summaryrefslogtreecommitdiffstats
path: root/modules/policy/lua-aho-corasick/tests
diff options
context:
space:
mode:
Diffstat (limited to 'modules/policy/lua-aho-corasick/tests')
-rw-r--r--modules/policy/lua-aho-corasick/tests/Makefile65
-rw-r--r--modules/policy/lua-aho-corasick/tests/ac_bench.cxx519
-rw-r--r--modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx135
-rw-r--r--modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx275
-rw-r--r--modules/policy/lua-aho-corasick/tests/dict/README.txt1
-rw-r--r--modules/policy/lua-aho-corasick/tests/dict/dict1.txt11
-rw-r--r--modules/policy/lua-aho-corasick/tests/load_ac_test.lua82
-rw-r--r--modules/policy/lua-aho-corasick/tests/lua_test.lua67
-rw-r--r--modules/policy/lua-aho-corasick/tests/test_base.hpp60
-rw-r--r--modules/policy/lua-aho-corasick/tests/test_bigfile.cxx167
-rw-r--r--modules/policy/lua-aho-corasick/tests/test_main.cxx33
11 files changed, 1415 insertions, 0 deletions
diff --git a/modules/policy/lua-aho-corasick/tests/Makefile b/modules/policy/lua-aho-corasick/tests/Makefile
new file mode 100644
index 0000000..54fd90f
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/Makefile
@@ -0,0 +1,65 @@
+OS := $(shell uname)
+ifeq ($(OS), Darwin)
+ SO_EXT := dylib
+else
+ SO_EXT := so
+endif
+
+.PHONY = all clean test runtest benchmark
+
+PROGRAM = ac_test
+BENCHMARK = ac_bench
+all: runtest
+
+CXXFLAGS = -O3 -g -march=native -Wall -DDEBUG
+MYCXXFLAGS = -MMD -I.. $(CXXFLAGS)
+%.o : %.cxx
+ $(CXX) $< -c $(MYCXXFLAGS)
+
+-include dep.cxx
+SRC = test_main.cxx ac_test_simple.cxx ac_test_aggr.cxx test_bigfile.cxx
+
+OBJ = ${SRC:.cxx=.o}
+
+-include test_dep.txt
+-include bench_dep.txt
+
+$(PROGRAM) $(BENCHMARK) : testinput/text.tar testinput/image.bin
+$(PROGRAM) : $(OBJ) ../libac.$(SO_EXT)
+ $(CXX) $(OBJ) -L.. -lac -o $@
+ -cat *.d > test_dep.txt
+
+$(BENCHMARK) : ac_bench.o ../libac.$(SO_EXT)
+ $(CXX) ac_bench.o -L.. -lac -o $@
+ -cat *.d > bench_dep.txt
+
+ifneq ($(OS), Darwin)
+runtest:$(PROGRAM)
+ LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/*
+
+benchmark:$(BENCHMARK)
+ LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./ac_bench
+
+else
+runtest:$(PROGRAM)
+ DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/*
+
+benchmark:$(BENCHMARK)
+ DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./ac_bench
+
+endif
+
+testinput/text.tar:
+ echo "download testing files (gcc tarball)..."
+ if [ ! -d testinput ] ; then mkdir testinput; fi
+ cd testinput && \
+ curl ftp://ftp.gnu.org/gnu/gcc/gcc-1.42.tar.gz -o text.tar.gz 2>/dev/null \
+ && gzip -d text.tar.gz
+
+testinput/image.bin:
+ echo "download testing files.."
+ if [ ! -d testinput ] ; then mkdir testinput; fi
+ curl http://www.3dvisionlive.com/sites/default/files/Curiosity_render_hiresb.jpg -o $@ 2>/dev/null
+
+clean:
+ -rm -f *.o *.d dep.txt $(PROGRAM) $(BENCHMARK)
diff --git a/modules/policy/lua-aho-corasick/tests/ac_bench.cxx b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx
new file mode 100644
index 0000000..421322c
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx
@@ -0,0 +1,519 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#include <string>
+#include <vector>
+#include "ac.h"
+#include "ac_util.hpp"
+
+using namespace std;
+
+static bool SomethingWrong = false;
+
+static int iteration = 300;
+static string dict_dir;
+static string obj_file_dir;
+static bool print_help = false;
+static int piece_size = 1024;
+
+class PatternSet {
+public:
+ PatternSet(const char* filepath);
+ ~PatternSet() { Cleanup(); }
+
+ int getPatternNum() const { return _pat_num; }
+ const char** getPatternVector() const { return _patterns; }
+ unsigned int* getPatternLenVector() const { return _pat_len; }
+
+ const char* getErrMessage() const { return _errmsg; }
+ static bool isDictFile(const char* filepath) {
+ if (strncmp(basename(const_cast<char*>(filepath)), "dict", 4))
+ return false;
+ return true;
+ }
+
+private:
+ bool ExtractPattern(const char* filepath);
+ void Cleanup();
+
+ const char** _patterns;
+ unsigned int* _pat_len;
+ char* _mmap;
+ int _fd;
+ size_t _mmap_size;
+ int _pat_num;
+
+ const char* _errmsg;
+};
+
+bool
+PatternSet::ExtractPattern(const char* filepath) {
+ if (!isDictFile(filepath))
+ return false;
+
+ struct stat filestat;
+ if (stat(filepath, &filestat)) {
+ _errmsg = "fail to call stat()";
+ return false;
+ }
+
+ if (filestat.st_size > 4096 * 1024) {
+ /* It dosen't seem to be a dictionary file*/
+ _errmsg = "file too big?";
+ return false;
+ }
+
+ _fd = open(filepath, 0);
+ if (_fd == -1) {
+ _errmsg = "fail to open dictionary file";
+ return false;
+ }
+
+ _mmap_size = filestat.st_size;
+ _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE, _fd, 0);
+ if (_mmap == MAP_FAILED) {
+ _errmsg = "fail to call mmap";
+ return false;
+ }
+
+ const char* pat = _mmap;
+ vector<const char*> pat_vect;
+ vector<unsigned> pat_len_vect;
+
+ for (size_t i = 0, e = filestat.st_size; i < e; i++) {
+ if (_mmap[i] == '\r' || _mmap[i] == '\n') {
+ _mmap[i] = '\0';
+ int len = _mmap + i - pat;
+ if (len > 0) {
+ pat_vect.push_back(pat);
+ pat_len_vect.push_back(len);
+ }
+ pat = _mmap + i + 1;
+ }
+ }
+
+ ASSERT(pat_vect.size() == pat_len_vect.size());
+
+ int pat_num = pat_vect.size();
+ if (pat_num > 0) {
+ const char** p = _patterns = new const char*[pat_num];
+ int i = 0;
+ for (vector<const char*>::iterator iter = pat_vect.begin(),
+ iter_e = pat_vect.end(); iter != iter_e; ++iter) {
+ p[i++] = *iter;
+ }
+
+ i = 0;
+ unsigned int* q = _pat_len = new unsigned int[pat_num];
+ for (vector<unsigned>::iterator iter = pat_len_vect.begin(),
+ iter_e = pat_len_vect.end(); iter != iter_e; ++iter) {
+ q[i++] = *iter;
+ }
+ }
+
+ _pat_num = pat_num;
+ if (pat_num <= 0) {
+ _errmsg = "no pattern at all";
+ return false;
+ }
+
+ return true;
+}
+
+void
+PatternSet::Cleanup() {
+ if (_mmap != MAP_FAILED) {
+ munmap(_mmap, _mmap_size);
+ _mmap = (char*)MAP_FAILED;
+ _mmap_size = 0;
+ }
+
+ delete[] _patterns;
+ delete[] _pat_len;
+ if (_fd != -1)
+ close(_fd);
+ _pat_num = -1;
+}
+
+PatternSet::PatternSet(const char* filepath) {
+ _patterns = 0;
+ _pat_len = 0;
+ _mmap = (char*)MAP_FAILED;
+ _mmap_size = 0;
+ _pat_num = -1;
+ _errmsg = "";
+
+ if (!ExtractPattern(filepath))
+ Cleanup();
+}
+
+bool
+getFilesUnderDir(vector<string>& files, const char* path) {
+ files.clear();
+
+ DIR* dir = opendir(path);
+ if (!dir)
+ return false;
+
+ string path_dir = path;
+ path_dir += "/";
+
+ for (;;) {
+ struct dirent* entry = readdir(dir);
+ if (entry) {
+ string filepath = path_dir + entry->d_name;
+ struct stat file_stat;
+ if (stat(filepath.c_str(), &file_stat)) {
+ closedir(dir);
+ return false;
+ }
+
+ if (S_ISREG(file_stat.st_mode))
+ files.push_back(filepath);
+
+ continue;
+ }
+
+ if (errno) {
+ return false;
+ }
+ break;
+ }
+ closedir(dir);
+ return true;
+}
+
+class Timer {
+public:
+ Timer() {
+ my_clock_gettime(&_start);
+ _stop = _start;
+ _acc.tv_sec = 0;
+ _acc.tv_nsec = 0;
+ }
+
+ const Timer& operator += (const Timer& that) {
+ time_t sec = _acc.tv_sec + that._acc.tv_sec;
+ long nsec = _acc.tv_nsec + that._acc.tv_nsec;
+ if (nsec > 1000000000) {
+ nsec -= 1000000000;
+ sec += 1;
+ }
+ _acc.tv_sec = sec;
+ _acc.tv_nsec = nsec;
+ return *this;
+ }
+
+ // return duration in us
+ size_t getDuration() const {
+ return _acc.tv_sec * (size_t)1000000 + _acc.tv_nsec/1000;
+ }
+
+ void Start(bool acc=true) {
+ my_clock_gettime(&_start);
+ }
+
+ void Stop() {
+ my_clock_gettime(&_stop);
+ struct timespec t = CalcDuration();
+ _acc = add_duration(_acc, t);
+ }
+
+private:
+ int my_clock_gettime(struct timespec* t) {
+#ifdef __linux
+ return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t);
+#else
+ struct timeval tv;
+ int rc = gettimeofday(&tv, 0);
+ t->tv_sec = tv.tv_sec;
+ t->tv_nsec = tv.tv_usec * 1000;
+ return rc;
+#endif
+ }
+
+ struct timespec add_duration(const struct timespec& dur1,
+ const struct timespec& dur2) {
+ time_t sec = dur1.tv_sec + dur2.tv_sec;
+ long nsec = dur1.tv_nsec + dur2.tv_nsec;
+ if (nsec > 1000000000) {
+ nsec -= 1000000000;
+ sec += 1;
+ }
+ timespec t;
+ t.tv_sec = sec;
+ t.tv_nsec = nsec;
+
+ return t;
+ }
+
+ struct timespec CalcDuration() const {
+ timespec diff;
+ if ((_stop.tv_nsec - _start.tv_nsec)<0) {
+ diff.tv_sec = _stop.tv_sec - _start.tv_sec - 1;
+ diff.tv_nsec = 1000000000 + _stop.tv_nsec - _start.tv_nsec;
+ } else {
+ diff.tv_sec = _stop.tv_sec - _start.tv_sec;
+ diff.tv_nsec = _stop.tv_nsec - _start.tv_nsec;
+ }
+ return diff;
+ }
+
+ struct timespec _start;
+ struct timespec _stop;
+ struct timespec _acc;
+};
+
+class Benchmark {
+public:
+ Benchmark(const PatternSet& pat_set, const char* infile):
+ _pat_set(pat_set), _infile(infile) {
+ _mmap = (char*)MAP_FAILED;
+ _file_sz = 0;
+ _fd = -1;
+ }
+
+ ~Benchmark() {
+ if (_mmap != MAP_FAILED)
+ munmap(_mmap, _file_sz);
+ if (_fd != -1)
+ close(_fd);
+ }
+
+ bool Run(int iteration);
+ const Timer& getTimer() const { return _timer; }
+
+private:
+ const PatternSet& _pat_set;
+ const char* _infile;
+ char* _mmap;
+ int _fd;
+ size_t _file_sz; // input file size
+ Timer _timer;
+};
+
+bool
+Benchmark::Run(int iteration) {
+ if (_pat_set.getPatternNum() <= 0) {
+ SomethingWrong = true;
+ return false;
+ }
+
+ if (_mmap == MAP_FAILED) {
+ struct stat filestat;
+ if (stat(_infile, &filestat)) {
+ SomethingWrong = true;
+ return false;
+ }
+
+ if (!S_ISREG(filestat.st_mode)) {
+ SomethingWrong = true;
+ return false;
+ }
+
+ _fd = open(_infile, 0);
+ if (_fd == -1)
+ return false;
+
+ _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE, _fd, 0);
+
+ if (_mmap == MAP_FAILED) {
+ SomethingWrong = true;
+ return false;
+ }
+
+ _file_sz = filestat.st_size;
+ }
+
+ ac_t* ac = ac_create(_pat_set.getPatternVector(),
+ _pat_set.getPatternLenVector(),
+ _pat_set.getPatternNum());
+ if (!ac) {
+ SomethingWrong = true;
+ return false;
+ }
+
+ int piece_num = _file_sz/piece_size;
+
+ _timer.Start(false);
+
+ /* Stupid compiler may not be able to promote piece_size into register.
+ * Do it manually.
+ */
+ int piece_sz = piece_size;
+ for (int i = 0; i < iteration; i++) {
+ size_t match_ofst = 0;
+ for (int piece_idx = 0; piece_idx < piece_num; piece_idx ++) {
+ ac_match2(ac, _mmap + match_ofst, piece_sz);
+ match_ofst += piece_sz;
+ }
+ if (match_ofst != _file_sz)
+ ac_match2(ac, _mmap + match_ofst, _file_sz - match_ofst);
+ }
+ _timer.Stop();
+ return true;
+}
+
+const char* short_opt = "hd:f:i:p:";
+const struct option long_opts[] = {
+ {"help", no_argument, 0, 'h'},
+ {"iteration", required_argument, 0, 'i'},
+ {"dictionary-dir", required_argument, 0, 'd'},
+ {"obj-file-dir", required_argument, 0, 'f'},
+ {"piece-size", required_argument, 0, 'p'},
+};
+
+static void
+PrintHelp(const char* prog_name) {
+ const char* msg =
+"Usage %s [OPTIONS]\n"
+" -d, --dictionary-dir : specify the dictionary directory (./dict by default)\n"
+" -f, --obj-file-dir : specify the object file directory\n"
+" (./testinput by default)\n"
+" -i, --iteration : Run this many iteration for each pattern match\n"
+" -p, --piece-size : The size of 'piece' in byte. The input file is\n"
+" divided into pieces, and match function is working\n"
+" on one piece at a time. The default size of piece\n"
+" is 1k byte.\n";
+
+ fprintf(stdout, msg, prog_name);
+}
+
+static bool
+getOptions(int argc, char** argv) {
+ bool dict_dir_set = false;
+ bool objfile_dir_set = false;
+ int opt_index;
+
+ while (1) {
+ if (print_help) break;
+
+ int c = getopt_long(argc, argv, short_opt, long_opts, &opt_index);
+
+ if (c == -1) break;
+ if (c == 0) { c = long_opts[opt_index].val; }
+
+ switch(c) {
+ case 'h':
+ print_help = true;
+ break;
+
+ case 'i':
+ iteration = atol(optarg);
+ break;
+
+ case 'd':
+ dict_dir = optarg;
+ dict_dir_set = true;
+ break;
+
+ case 'f':
+ obj_file_dir = optarg;
+ objfile_dir_set = true;
+ break;
+
+ case 'p':
+ piece_size = atol(optarg);
+ break;
+
+ case '?':
+ default:
+ return false;
+ }
+ }
+
+ if (print_help)
+ return true;
+
+ string basedir(dirname(argv[0]));
+ if (!dict_dir_set)
+ dict_dir = basedir + "/dict";
+
+ if (!objfile_dir_set)
+ obj_file_dir = basedir + "/testinput";
+
+ return true;
+}
+
+int
+main(int argc, char** argv) {
+ if (!getOptions(argc, argv))
+ return -1;
+
+ if (print_help) {
+ PrintHelp(argv[0]);
+ return 0;
+ }
+
+#ifndef __linux
+ fprintf(stdout, "\n!!!WARNING: On this OS, the execution time is measured"
+ " by gettimeofday(2) which is imprecise!!!\n\n");
+#endif
+
+ fprintf(stdout, "Test with iteration = %d, piece size = %d, and",
+ iteration, piece_size);
+ fprintf(stdout, "\n dictionary dir = %s\n object file dir = %s\n\n",
+ dict_dir.c_str(), obj_file_dir.c_str());
+
+ vector<string> dict_files;
+ vector<string> input_files;
+
+ if (!getFilesUnderDir(dict_files, dict_dir.c_str())) {
+ fprintf(stdout, "fail to find dictionary files\n");
+ return -1;
+ }
+
+ if (!getFilesUnderDir(input_files, obj_file_dir.c_str())) {
+ fprintf(stdout, "fail to find test input files\n");
+ return -1;
+ }
+
+ for (vector<string>::iterator diter = dict_files.begin(),
+ diter_e = dict_files.end(); diter != diter_e; ++diter) {
+
+ const char* dict_name = diter->c_str();
+ if (!PatternSet::isDictFile(dict_name))
+ continue;
+
+ PatternSet ps(dict_name);
+ if (ps.getPatternNum() <= 0) {
+ fprintf(stdout, "fail to open dictionary file %s : %s\n",
+ dict_name, ps.getErrMessage());
+ SomethingWrong = true;
+ continue;
+ }
+
+ fprintf(stdout, "Using dictionary %s\n", dict_name);
+ Timer timer;
+ for (vector<string>::iterator iter = input_files.begin(),
+ iter_e = input_files.end(); iter != iter_e; ++iter) {
+ fprintf(stdout, " testing %s ... ", iter->c_str());
+ fflush(stdout);
+ Benchmark bm(ps, iter->c_str());
+ bm.Run(iteration);
+ const Timer& t = bm.getTimer();
+ timer += bm.getTimer();
+ fprintf(stdout, "elapsed %.3f\n", t.getDuration() / 1000000.0);
+ }
+
+ fprintf(stdout,
+ "\n==========================================================\n"
+ " Total Elapse %.3f\n\n", timer.getDuration() / 1000000.0);
+ }
+
+ return SomethingWrong ? -1 : 0;
+}
diff --git a/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx b/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx
new file mode 100644
index 0000000..4ea02bc
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx
@@ -0,0 +1,135 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <vector>
+#include <string>
+
+#include "ac.h"
+#include "ac_util.hpp"
+#include "test_base.hpp"
+
+using namespace std;
+
+namespace {
+class ACBigFileTester : public BigFileTester {
+public:
+ ACBigFileTester(const char* filepath) : BigFileTester(filepath){};
+
+private:
+ virtual buf_header_t* PM_Create(const char** strv, uint32* strlenv,
+ uint32 vect_len) {
+ return (buf_header_t*)ac_create(strv, strlenv, vect_len);
+ }
+
+ virtual void PM_Free(buf_header_t* PM) { ac_free(PM); }
+ virtual bool Run_Helper(buf_header_t* PM);
+};
+
+class ACTestAggressive: public ACTestBase {
+public:
+ ACTestAggressive(const vector<const char*>& files, const char* banner)
+ : ACTestBase(banner), _files(files) {}
+ virtual bool Run();
+
+private:
+ void PrintSummary(int total, int fail) {
+ fprintf(stdout, "Test count : %d, fail: %d\n", total, fail);
+ fflush(stdout);
+ }
+ vector<const char*> _files;
+};
+
+} // end of anonymous namespace
+
+bool
+ACBigFileTester::Run_Helper(buf_header_t* PM) {
+ int fail = 0;
+ // advance one chunk at a time.
+ int len = _msg_len;
+ int chunk_sz = _chunk_sz;
+
+ vector<const char*> c_style_keys;
+ for (int i = 0, e = _keys.size(); i != e; i++) {
+ const char* key = _keys[i].first;
+ int len = _keys[i].second;
+ char *t = new char[len+1];
+ memcpy(t, key, len);
+ t[len] = '\0';
+ c_style_keys.push_back(t);
+ }
+
+ for (int ofst = 0, chunk_idx = 0, chunk_num = _chunk_num;
+ chunk_idx < chunk_num; ofst += chunk_sz, chunk_idx++) {
+ const char* substring = _msg + ofst;
+ ac_result_t r = ac_match((ac_t*)(void*)PM, substring , len - ofst);
+ int m_b = r.match_begin;
+ int m_e = r.match_end;
+
+ if (m_b < 0 || m_e < 0 || m_e <= m_b || m_e >= len) {
+ fprintf(stdout, "fail to find match substring[%d:%d])\n",
+ ofst, len - 1);
+ fail ++;
+ continue;
+ }
+
+ const char* match_str = _msg + len;
+ int strstr_len = 0;
+ int key_idx = -1;
+
+ for (int i = 0, e = c_style_keys.size(); i != e; i++) {
+ const char* key = c_style_keys[i];
+ if (const char *m = strstr(substring, key)) {
+ if (m < match_str) {
+ match_str = m;
+ strstr_len = _keys[i].second;
+ key_idx = i;
+ }
+ }
+ }
+ ASSERT(key_idx != -1);
+ if ((match_str - substring != m_b)) {
+ fprintf(stdout,
+ "Fail to find match substring[%d:%d]),"
+ " expected to find match at offset %d instead of %d\n",
+ ofst, len - 1,
+ (int)(match_str - _msg), ofst + m_b);
+ fprintf(stdout, "%d vs %d (key idx %d)\n", strstr_len, m_e - m_b + 1, key_idx);
+ PrintStr(stdout, match_str, strstr_len);
+ fprintf(stdout, "\n");
+ PrintStr(stdout, _msg + ofst + m_b,
+ m_e - m_b + 1);
+ fprintf(stdout, "\n");
+ fail ++;
+ }
+ }
+ for (vector<const char*>::iterator i = c_style_keys.begin(),
+ e = c_style_keys.end(); i != e; i++) {
+ delete[] *i;
+ }
+
+ return fail == 0;
+}
+
+bool
+ACTestAggressive::Run() {
+ int fail = 0;
+ for (vector<const char*>::iterator i = _files.begin(), e = _files.end();
+ i != e; i++) {
+ ACBigFileTester bft(*i);
+ if (!bft.Run())
+ fail ++;
+ }
+ return fail == 0;
+}
+
+bool
+Run_AC_Aggressive_Test(const vector<const char*>& files) {
+ ACTestAggressive t(files, "AC Aggressive test");
+ t.PrintBanner();
+ return t.Run();
+}
diff --git a/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx b/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx
new file mode 100644
index 0000000..fa2d7fd
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx
@@ -0,0 +1,275 @@
+#include <stdio.h>
+#include <string.h>
+#include <vector>
+#include <string>
+
+#include "ac.h"
+#include "ac_util.hpp"
+#include "test_base.hpp"
+
+using namespace std;
+
+namespace {
+typedef struct {
+ const char* str;
+ const char* match;
+} StrPair;
+
+typedef enum {
+ MV_FIRST_MATCH = 0,
+ MV_LEFT_LONGEST = 1,
+} MatchVariant;
+
+typedef struct {
+ const char* name;
+ const char** dict;
+ StrPair* strpairs;
+ int dict_len;
+ int strpair_num;
+ MatchVariant match_variant;
+} TestingCase;
+
+class Tests {
+public:
+ Tests(const char* name,
+ const char* dict[], int dict_len,
+ StrPair strpairs[], int strpair_num,
+ MatchVariant mv = MV_FIRST_MATCH) {
+ if (!_tests)
+ _tests = new vector<TestingCase>;
+
+ TestingCase tc;
+ tc.name = name;
+ tc.dict = dict;
+ tc.strpairs = strpairs;
+ tc.dict_len = dict_len;
+ tc.strpair_num = strpair_num;
+ tc.match_variant = mv;
+ _tests->push_back(tc);
+ }
+
+ static vector<TestingCase>* Get_Tests() { return _tests; }
+ static void Erase_Tests() { delete _tests; _tests = 0; }
+
+private:
+ static vector<TestingCase> *_tests;
+};
+
+class LeftLongestTests : public Tests {
+public:
+ LeftLongestTests (const char* name, const char* dict[], int dict_len,
+ StrPair strpairs[], int strpair_num):
+ Tests(name, dict, dict_len, strpairs, strpair_num, MV_LEFT_LONGEST) {
+ }
+};
+
+vector<TestingCase>* Tests::_tests = 0;
+
+class ACTestSimple: public ACTestBase {
+public:
+ ACTestSimple(const char* banner) : ACTestBase(banner) {}
+ virtual bool Run();
+
+private:
+ void PrintSummary(int total, int fail) {
+ fprintf(stdout, "Test count : %d, fail: %d\n", total, fail);
+ fflush(stdout);
+ }
+};
+}
+
+bool
+ACTestSimple::Run() {
+ int total = 0;
+ int fail = 0;
+
+ vector<TestingCase> *tests = Tests::Get_Tests();
+ if (!tests) {
+ PrintSummary(0, 0);
+ return true;
+ }
+
+ for (vector<TestingCase>::iterator i = tests->begin(), e = tests->end();
+ i != e; i++) {
+ TestingCase& t = *i;
+ int dict_len = t.dict_len;
+ unsigned int* strlen_v = new unsigned int[dict_len];
+
+ fprintf(stdout, ">Testing %s\nDictionary:[ ", t.name);
+ for (int i = 0, need_break=0; i < dict_len; i++) {
+ const char* s = t.dict[i];
+ fprintf(stdout, "%s, ", s);
+ strlen_v[i] = strlen(s);
+ if (need_break++ == 16) {
+ fputs("\n ", stdout);
+ need_break = 0;
+ }
+ }
+ fputs("]\n", stdout);
+
+ /* Create the dictionary */
+ ac_t* ac = ac_create(t.dict, strlen_v, dict_len);
+ delete[] strlen_v;
+
+ for (int ii = 0, ee = t.strpair_num; ii < ee; ii++, total++) {
+ const StrPair& sp = t.strpairs[ii];
+ const char *str = sp.str; // the string to be matched
+ const char *match = sp.match;
+
+ fprintf(stdout, "[%3d] Testing '%s' : ", total, str);
+
+ int len = strlen(str);
+ ac_result_t r;
+ if (t.match_variant == MV_FIRST_MATCH)
+ r = ac_match(ac, str, len);
+ else if (t.match_variant == MV_LEFT_LONGEST)
+ r = ac_match_longest_l(ac, str, len);
+ else {
+ ASSERT(false && "Unknown variant");
+ }
+
+ int m_b = r.match_begin;
+ int m_e = r.match_end;
+
+ // The return value per se is insane.
+ if (m_b > m_e ||
+ ((m_b < 0 || m_e < 0) && (m_b != -1 || m_e != -1))) {
+ fprintf(stdout, "Insane return value (%d, %d)\n", m_b, m_e);
+ fail ++;
+ continue;
+ }
+
+ // If the string is not supposed to match the dictionary.
+ if (!match) {
+ if (m_b != -1 || m_e != -1) {
+ fail ++;
+ fprintf(stdout, "Not Supposed to match (%d, %d) \n",
+ m_b, m_e);
+ } else
+ fputs("Pass\n", stdout);
+ continue;
+ }
+
+ // The string or its substring is match the dict.
+ if (m_b >= len || m_b >= len) {
+ fail ++;
+ fprintf(stdout,
+ "Return value >= the length of the string (%d, %d)\n",
+ m_b, m_e);
+ continue;
+ } else {
+ int mlen = strlen(match);
+ if ((mlen != m_e - m_b + 1) ||
+ strncmp(str + m_b, match, mlen)) {
+ fail ++;
+ fprintf(stdout, "Fail\n");
+ } else
+ fprintf(stdout, "Pass\n");
+ }
+ }
+ fputs("\n", stdout);
+ ac_free(ac);
+ }
+
+ PrintSummary(total, fail);
+ return fail == 0;
+}
+
+bool
+Run_AC_Simple_Test() {
+ ACTestSimple t("AC Simple test");
+ t.PrintBanner();
+ return t.Run();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Testing cases for first-match variant (i.e. test ac_match())
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+
+/* test 1*/
+const char *dict1[] = {"he", "she", "his", "her"};
+StrPair strpair1[] = {
+ {"he", "he"}, {"she", "she"}, {"his", "his"},
+ {"hers", "he"}, {"ahe", "he"}, {"shhe", "he"},
+ {"shis2", "his"}, {"ahhe", "he"}
+};
+Tests test1("test 1",
+ dict1, sizeof(dict1)/sizeof(dict1[0]),
+ strpair1, sizeof(strpair1)/sizeof(strpair1[0]));
+
+/* test 2*/
+const char *dict2[] = {"poto", "poto"}; /* duplicated strings*/
+StrPair strpair2[] = {{"The pot had a handle", 0}};
+Tests test2("test 2", dict2, 2, strpair2, 1);
+
+/* test 3*/
+const char *dict3[] = {"The"};
+StrPair strpair3[] = {{"The pot had a handle", "The"}};
+Tests test3("test 3", dict3, 1, strpair3, 1);
+
+/* test 4*/
+const char *dict4[] = {"pot"};
+StrPair strpair4[] = {{"The pot had a handle", "pot"}};
+Tests test4("test 4", dict4, 1, strpair4, 1);
+
+/* test 5*/
+const char *dict5[] = {"pot "};
+StrPair strpair5[] = {{"The pot had a handle", "pot "}};
+Tests test5("test 5", dict5, 1, strpair5, 1);
+
+/* test 6*/
+const char *dict6[] = {"ot h"};
+StrPair strpair6[] = {{"The pot had a handle", "ot h"}};
+Tests test6("test 6", dict6, 1, strpair6, 1);
+
+/* test 7*/
+const char *dict7[] = {"andle"};
+StrPair strpair7[] = {{"The pot had a handle", "andle"}};
+Tests test7("test 7", dict7, 1, strpair7, 1);
+
+const char *dict8[] = {"aaab"};
+StrPair strpair8[] = {{"aaaaaaab", "aaab"}};
+Tests test8("test 8", dict8, 1, strpair8, 1);
+
+const char *dict9[] = {"haha", "z"};
+StrPair strpair9[] = {{"aaaaz", "z"}, {"z", "z"}};
+Tests test9("test 9", dict9, 2, strpair9, 2);
+
+/* test the case when input string dosen't contain even a single char
+ * of the pattern in dictionary.
+ */
+const char *dict10[] = {"abc"};
+StrPair strpair10[] = {{"cde", 0}};
+Tests test10("test 10", dict10, 1, strpair10, 1);
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Testing cases for first longest match variant (i.e.
+// test ac_match_longest_l())
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+
+// This was actually first motivation for left-longest-match
+const char *dict100[] = {"Mozilla", "Mozilla Mobile"};
+StrPair strpair100[] = {{"User Agent containing string Mozilla Mobile", "Mozilla Mobile"}};
+LeftLongestTests test100("l_test 100", dict100, 2, strpair100, 1);
+
+// Dict with single char is tricky
+const char *dict101[] = {"a", "abc"};
+StrPair strpair101[] = {{"abcdef", "abc"}};
+LeftLongestTests test101("l_test 101", dict101, 2, strpair101, 1);
+
+// Testing case with partially overlapping patterns. The purpose is to
+// check if the fail-link leading from terminal state is correct.
+//
+// The fail-link leading from terminal-state does not matter in
+// match-first-occurrence variant, as it stop when a terminal is hit.
+//
+const char *dict102[] = {"abc", "bcdef"};
+StrPair strpair102[] = {{"abcdef", "bcdef"}};
+LeftLongestTests test102("l_test 102", dict102, 2, strpair102, 1);
diff --git a/modules/policy/lua-aho-corasick/tests/dict/README.txt b/modules/policy/lua-aho-corasick/tests/dict/README.txt
new file mode 100644
index 0000000..cd50b41
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/dict/README.txt
@@ -0,0 +1 @@
+This directory contains pattern set of benchmark purpose.
diff --git a/modules/policy/lua-aho-corasick/tests/dict/dict1.txt b/modules/policy/lua-aho-corasick/tests/dict/dict1.txt
new file mode 100644
index 0000000..94085a9
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/dict/dict1.txt
@@ -0,0 +1,11 @@
+false_return@
+forloop#haha
+wtfprogram
+mmaporunmap
+ThIs?Module!IsEssential
+struct rtlwtf
+gettIMEOfdayWrong
+edistribution_and_use_in_@source
+Copyright~#@
+while {!
+!%SQLinje
diff --git a/modules/policy/lua-aho-corasick/tests/load_ac_test.lua b/modules/policy/lua-aho-corasick/tests/load_ac_test.lua
new file mode 100644
index 0000000..7fb7db9
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/load_ac_test.lua
@@ -0,0 +1,82 @@
+-- This script is to test load_ac.lua
+--
+-- Some notes:
+-- 1. The purpose of this script is not to check if the libac.so work
+-- properly, it is to check if there are something stupid in load_ac.lua
+--
+-- 2. There are bunch of collectgarbage() calls, the purpose is to make
+-- sure the shared lib is not unloaded after GC.
+
+-- load_ac.lua looks up libac.so via package.cpath rather than LD_LIBRARY_PATH,
+-- prepend (instead of appending) some insane paths here to see if it quit
+-- prematurely.
+--
+package.cpath = ".;./?.so;" .. package.cpath
+
+local ac = require "load_ac"
+
+local ac_create = ac.create_ac
+local ac_match = ac.match
+local string_fmt = string.format
+local string_sub = string.sub
+
+local err_cnt = 0
+local function mytest(testname, dict, match, notmatch)
+ print(">Testing ", testname)
+
+ io.write(string_fmt("Dictionary: "));
+ for i=1, #dict do
+ io.write(string_fmt("%s, ", dict[i]))
+ end
+ print ""
+
+ local ac_inst = ac_create(dict);
+ collectgarbage()
+ for i=1, #match do
+ local str = match[i]
+ io.write(string_fmt("Matching %s, ", str))
+ local b = ac_match(ac_inst, str)
+ if b then
+ print "pass"
+ else
+ err_cnt = err_cnt + 1
+ print "fail"
+ end
+ collectgarbage()
+ end
+
+ if notmatch == nil then
+ return
+ end
+
+ collectgarbage()
+
+ for i = 1, #notmatch do
+ local str = notmatch[i]
+ io.write(string_fmt("*Matching %s, ", str))
+ local r = ac_match(ac_inst, str)
+ if r then
+ err_cnt = err_cnt + 1
+ print("fail")
+ else
+ print("succ")
+ end
+ collectgarbage()
+ end
+ ac_inst = nil
+ collectgarbage()
+end
+
+print("")
+print("====== Test to see if load_ac.lua works properly ========")
+
+mytest("test1",
+ {"he", "she", "his", "her", "str\0ing"},
+ -- matching cases
+ { "he", "she", "his", "hers", "ahe", "shhe", "shis2", "ahhe", "str\0ing" },
+
+ -- not matching case
+ {"str\0", "str"}
+ )
+
+os.exit((err_cnt == 0) and 0 or 1)
diff --git a/modules/policy/lua-aho-corasick/tests/lua_test.lua b/modules/policy/lua-aho-corasick/tests/lua_test.lua
new file mode 100644
index 0000000..cfe178f
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/lua_test.lua
@@ -0,0 +1,67 @@
+-- This script is to test ahocorasick.so not libac.so
+--
+local ac = require "ahocorasick"
+
+local ac_create = ac.create
+local ac_match = ac.match
+local string_fmt = string.format
+local string_sub = string.sub
+
+local err_cnt = 0
+local function mytest(testname, dict, match, notmatch)
+ print(">Testing ", testname)
+
+ io.write(string_fmt("Dictionary: "));
+ for i=1, #dict do
+ io.write(string_fmt("%s, ", dict[i]))
+ end
+ print ""
+
+ local ac_inst = ac_create(dict);
+ for i=1, #match do
+ local str = match[i][1]
+ local substr = match[i][2]
+ io.write(string_fmt("Matching %s, ", str))
+ local b, e = ac_match(ac_inst, str)
+ if b and e and (string_sub(str, b+1, e+1) == substr) then
+ print "pass"
+ else
+ err_cnt = err_cnt + 1
+ print "fail"
+ end
+ --print("gc is called")
+ collectgarbage()
+ end
+
+ if notmatch == nil then
+ return
+ end
+
+ for i = 1, #notmatch do
+ local str = notmatch[i]
+ io.write(string_fmt("*Matching %s, ", str))
+ local r = ac_match(ac_inst, str)
+ if r then
+ err_cnt = err_cnt + 1
+ print("fail")
+ else
+ print("succ")
+ end
+ collectgarbage()
+ end
+end
+
+mytest("test1",
+ {"he", "she", "his", "her", "str\0ing"},
+ -- matching cases
+ { {"he", "he"}, {"she", "she"}, {"his", "his"}, {"hers", "he"},
+ {"ahe", "he"}, {"shhe", "he"}, {"shis2", "his"}, {"ahhe", "he"},
+ {"str\0ing", "str\0ing"}
+ },
+
+ -- not matching case
+ {"str\0", "str"}
+
+ )
+
+os.exit((err_cnt == 0) and 0 or 1)
diff --git a/modules/policy/lua-aho-corasick/tests/test_base.hpp b/modules/policy/lua-aho-corasick/tests/test_base.hpp
new file mode 100644
index 0000000..7758371
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/test_base.hpp
@@ -0,0 +1,60 @@
+#ifndef TEST_BASE_H
+#define TEST_BASE_H
+
+#include <stdio.h>
+#include <string>
+#include <stdint.h>
+
+using namespace std;
+class ACTestBase {
+public:
+ ACTestBase(const char* name) :_banner(name) {}
+ virtual void PrintBanner() {
+ fprintf(stdout, "\n===== %s ====\n", _banner.c_str());
+ }
+
+ virtual bool Run() = 0;
+private:
+ string _banner;
+};
+
+typedef std::pair<const char*, int> StrInfo;
+class BigFileTester {
+public:
+ BigFileTester(const char* filepath);
+ virtual ~BigFileTester() { Cleanup(); }
+
+ bool Run();
+
+protected:
+ virtual buf_header_t* PM_Create(const char** strv, uint32_t* strlenv,
+ uint32_t vect_len) = 0;
+ virtual void PM_Free(buf_header_t*) = 0;
+ virtual bool Run_Helper(buf_header_t* PM) = 0;
+
+ // Return true if the '\0' is valid char of a string.
+ virtual bool Str_C_Style() { return true; }
+
+ bool GenerateKeys();
+ void Cleanup();
+ void PrintStr(FILE*, const char* str, int len);
+
+protected:
+ const char* _filepath;
+ int _fd;
+ vector<StrInfo> _keys;
+ char* _msg;
+ int _msg_len;
+ int _key_num; // number of strings in dictionary
+ int _chunk_sz;
+ int _chunk_num;
+
+ int _max_key_num;
+ int _key_min_len;
+ int _key_max_len;
+};
+
+extern bool Run_AC_Simple_Test();
+extern bool Run_AC_Aggressive_Test(const vector<const char*>& files);
+
+#endif
diff --git a/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx b/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx
new file mode 100644
index 0000000..f189d8d
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx
@@ -0,0 +1,167 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <vector>
+#include <string>
+
+#include "ac.h"
+#include "ac_util.hpp"
+#include "test_base.hpp"
+
+///////////////////////////////////////////////////////////////////////////
+//
+// Implementation of BigFileTester
+//
+///////////////////////////////////////////////////////////////////////////
+//
+BigFileTester::BigFileTester(const char* filepath) {
+ _filepath = filepath;
+ _fd = -1;
+ _msg = (char*)MAP_FAILED;
+ _msg_len = 0;
+ _key_num = 0;
+ _chunk_sz = 0;
+ _chunk_num = 0;
+
+ _max_key_num = 100;
+ _key_min_len = 20;
+ _key_max_len = 80;
+}
+
+void
+BigFileTester::Cleanup() {
+ if (_msg != MAP_FAILED) {
+ munmap((void*)_msg, _msg_len);
+ _msg = (char*)MAP_FAILED;
+ _msg_len = 0;
+ }
+
+ if (_fd != -1) {
+ close(_fd);
+ _fd = -1;
+ }
+}
+
+bool
+BigFileTester::GenerateKeys() {
+ int chunk_sz = 4096;
+ int max_key_num = _max_key_num;
+ int key_min_len = _key_min_len;
+ int key_max_len = _key_max_len;
+
+ int t = _msg_len / chunk_sz;
+ int keynum = t > max_key_num ? max_key_num : t;
+
+ if (keynum <= 4) {
+ // file is too small
+ return false;
+ }
+ chunk_sz = _msg_len / keynum;
+ _chunk_sz = chunk_sz;
+
+ // For each chunck, "randomly" grab a sub-string searving
+ // as key.
+ int random_ofst[] = { 12, 30, 23, 15 };
+ int rofstsz = sizeof(random_ofst)/sizeof(random_ofst[0]);
+ int ofst = 0;
+ const char* msg = _msg;
+ _chunk_num = keynum - 1;
+ for (int idx = 0, e = _chunk_num; idx < e; idx++) {
+ const char* key = msg + ofst + idx % rofstsz;
+ int key_len = key_min_len + idx % (key_max_len - key_min_len);
+ _keys.push_back(StrInfo(key, key_len));
+ ofst += chunk_sz;
+ }
+ return true;
+}
+
+bool
+BigFileTester::Run() {
+ // Step 1: Bring the file into memory
+ fprintf(stdout, "Testing using file '%s'...\n", _filepath);
+
+ int fd = _fd = ::open(_filepath, O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return false;
+ }
+
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ perror("fstat");
+ return false;
+ }
+
+ if (!S_ISREG (sb.st_mode)) {
+ fprintf(stderr, "%s is not regular file\n", _filepath);
+ return false;
+ }
+
+ int ten_M = 1024 * 1024 * 10;
+ int map_sz = _msg_len = sb.st_size > ten_M ? ten_M : sb.st_size;
+ char* p = _msg =
+ (char*)mmap (0, map_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ return false;
+ }
+
+ // Get rid of '\0' if we are picky at it.
+ if (Str_C_Style()) {
+ for (int i = 0; i < map_sz; i++) { if (!p[i]) p[i] = 'a'; }
+ p[map_sz - 1] = 0;
+ }
+
+ // Step 2: "Fabricate" some keys from the file.
+ if (!GenerateKeys()) {
+ close(fd);
+ return false;
+ }
+
+ // Step 3: Create PM instance
+ const char** keys = new const char*[_keys.size()];
+ unsigned int* keylens = new unsigned int[_keys.size()];
+
+ int i = 0;
+ for (vector<StrInfo>::iterator si = _keys.begin(), se = _keys.end();
+ si != se; si++, i++) {
+ const StrInfo& strinfo = *si;
+ keys[i] = strinfo.first;
+ keylens[i] = strinfo.second;
+ }
+
+ buf_header_t* PM = PM_Create(keys, keylens, i);
+ delete[] keys;
+ delete[] keylens;
+
+ // Step 4: Run testing
+ bool res = Run_Helper(PM);
+ PM_Free(PM);
+
+ // Step 5: Clanup
+ munmap(p, map_sz);
+ _msg = (char*)MAP_FAILED;
+ close(fd);
+ _fd = -1;
+
+ fprintf(stdout, "%s\n", res ? "succ" : "fail");
+ return res;
+}
+
+void
+BigFileTester::PrintStr(FILE* f, const char* str, int len) {
+ fprintf(f, "{");
+ for (int i = 0; i < len; i++) {
+ unsigned char c = str[i];
+ if (isprint(c))
+ fprintf(f, "'%c', ", c);
+ else
+ fprintf(f, "%#x, ", c);
+ }
+ fprintf(f, "}");
+};
diff --git a/modules/policy/lua-aho-corasick/tests/test_main.cxx b/modules/policy/lua-aho-corasick/tests/test_main.cxx
new file mode 100644
index 0000000..b4f5225
--- /dev/null
+++ b/modules/policy/lua-aho-corasick/tests/test_main.cxx
@@ -0,0 +1,33 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <vector>
+#include <string>
+#include "ac.h"
+#include "ac_util.hpp"
+#include "test_base.hpp"
+
+using namespace std;
+
+
+/////////////////////////////////////////////////////////////////////////
+//
+// Simple (yet maybe tricky) testings
+//
+/////////////////////////////////////////////////////////////////////////
+//
+int
+main (int argc, char** argv) {
+ bool succ = Run_AC_Simple_Test();
+
+ vector<const char*> files;
+ for (int i = 1; i < argc; i++) { files.push_back(argv[i]); }
+ succ = Run_AC_Aggressive_Test(files) && succ;
+
+ return succ ? 0 : -1;
+};