diff options
Diffstat (limited to 'modules/policy/lua-aho-corasick/tests')
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/Makefile | 65 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/ac_bench.cxx | 519 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx | 135 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx | 275 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/dict/README.txt | 1 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/dict/dict1.txt | 11 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/load_ac_test.lua | 82 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/lua_test.lua | 67 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/test_base.hpp | 60 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/test_bigfile.cxx | 167 | ||||
-rw-r--r-- | modules/policy/lua-aho-corasick/tests/test_main.cxx | 33 |
11 files changed, 1415 insertions, 0 deletions
diff --git a/modules/policy/lua-aho-corasick/tests/Makefile b/modules/policy/lua-aho-corasick/tests/Makefile new file mode 100644 index 0000000..54fd90f --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/Makefile @@ -0,0 +1,65 @@ +OS := $(shell uname) +ifeq ($(OS), Darwin) + SO_EXT := dylib +else + SO_EXT := so +endif + +.PHONY = all clean test runtest benchmark + +PROGRAM = ac_test +BENCHMARK = ac_bench +all: runtest + +CXXFLAGS = -O3 -g -march=native -Wall -DDEBUG +MYCXXFLAGS = -MMD -I.. $(CXXFLAGS) +%.o : %.cxx + $(CXX) $< -c $(MYCXXFLAGS) + +-include dep.cxx +SRC = test_main.cxx ac_test_simple.cxx ac_test_aggr.cxx test_bigfile.cxx + +OBJ = ${SRC:.cxx=.o} + +-include test_dep.txt +-include bench_dep.txt + +$(PROGRAM) $(BENCHMARK) : testinput/text.tar testinput/image.bin +$(PROGRAM) : $(OBJ) ../libac.$(SO_EXT) + $(CXX) $(OBJ) -L.. -lac -o $@ + -cat *.d > test_dep.txt + +$(BENCHMARK) : ac_bench.o ../libac.$(SO_EXT) + $(CXX) ac_bench.o -L.. -lac -o $@ + -cat *.d > bench_dep.txt + +ifneq ($(OS), Darwin) +runtest:$(PROGRAM) + LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/* + +benchmark:$(BENCHMARK) + LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):.. ./ac_bench + +else +runtest:$(PROGRAM) + DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./$(PROGRAM) testinput/* + +benchmark:$(BENCHMARK) + DYLD_LIBRARY_PATH=$(DYLD_LIBRARY_PATH):.. ./ac_bench + +endif + +testinput/text.tar: + echo "download testing files (gcc tarball)..." + if [ ! -d testinput ] ; then mkdir testinput; fi + cd testinput && \ + curl ftp://ftp.gnu.org/gnu/gcc/gcc-1.42.tar.gz -o text.tar.gz 2>/dev/null \ + && gzip -d text.tar.gz + +testinput/image.bin: + echo "download testing files.." + if [ ! -d testinput ] ; then mkdir testinput; fi + curl http://www.3dvisionlive.com/sites/default/files/Curiosity_render_hiresb.jpg -o $@ 2>/dev/null + +clean: + -rm -f *.o *.d dep.txt $(PROGRAM) $(BENCHMARK) diff --git a/modules/policy/lua-aho-corasick/tests/ac_bench.cxx b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx new file mode 100644 index 0000000..421322c --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx @@ -0,0 +1,519 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <time.h> +#include <fcntl.h> +#include <unistd.h> +#include <dirent.h> +#include <libgen.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> + +#include <string> +#include <vector> +#include "ac.h" +#include "ac_util.hpp" + +using namespace std; + +static bool SomethingWrong = false; + +static int iteration = 300; +static string dict_dir; +static string obj_file_dir; +static bool print_help = false; +static int piece_size = 1024; + +class PatternSet { +public: + PatternSet(const char* filepath); + ~PatternSet() { Cleanup(); } + + int getPatternNum() const { return _pat_num; } + const char** getPatternVector() const { return _patterns; } + unsigned int* getPatternLenVector() const { return _pat_len; } + + const char* getErrMessage() const { return _errmsg; } + static bool isDictFile(const char* filepath) { + if (strncmp(basename(const_cast<char*>(filepath)), "dict", 4)) + return false; + return true; + } + +private: + bool ExtractPattern(const char* filepath); + void Cleanup(); + + const char** _patterns; + unsigned int* _pat_len; + char* _mmap; + int _fd; + size_t _mmap_size; + int _pat_num; + + const char* _errmsg; +}; + +bool +PatternSet::ExtractPattern(const char* filepath) { + if (!isDictFile(filepath)) + return false; + + struct stat filestat; + if (stat(filepath, &filestat)) { + _errmsg = "fail to call stat()"; + return false; + } + + if (filestat.st_size > 4096 * 1024) { + /* It dosen't seem to be a dictionary file*/ + _errmsg = "file too big?"; + return false; + } + + _fd = open(filepath, 0); + if (_fd == -1) { + _errmsg = "fail to open dictionary file"; + return false; + } + + _mmap_size = filestat.st_size; + _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE, _fd, 0); + if (_mmap == MAP_FAILED) { + _errmsg = "fail to call mmap"; + return false; + } + + const char* pat = _mmap; + vector<const char*> pat_vect; + vector<unsigned> pat_len_vect; + + for (size_t i = 0, e = filestat.st_size; i < e; i++) { + if (_mmap[i] == '\r' || _mmap[i] == '\n') { + _mmap[i] = '\0'; + int len = _mmap + i - pat; + if (len > 0) { + pat_vect.push_back(pat); + pat_len_vect.push_back(len); + } + pat = _mmap + i + 1; + } + } + + ASSERT(pat_vect.size() == pat_len_vect.size()); + + int pat_num = pat_vect.size(); + if (pat_num > 0) { + const char** p = _patterns = new const char*[pat_num]; + int i = 0; + for (vector<const char*>::iterator iter = pat_vect.begin(), + iter_e = pat_vect.end(); iter != iter_e; ++iter) { + p[i++] = *iter; + } + + i = 0; + unsigned int* q = _pat_len = new unsigned int[pat_num]; + for (vector<unsigned>::iterator iter = pat_len_vect.begin(), + iter_e = pat_len_vect.end(); iter != iter_e; ++iter) { + q[i++] = *iter; + } + } + + _pat_num = pat_num; + if (pat_num <= 0) { + _errmsg = "no pattern at all"; + return false; + } + + return true; +} + +void +PatternSet::Cleanup() { + if (_mmap != MAP_FAILED) { + munmap(_mmap, _mmap_size); + _mmap = (char*)MAP_FAILED; + _mmap_size = 0; + } + + delete[] _patterns; + delete[] _pat_len; + if (_fd != -1) + close(_fd); + _pat_num = -1; +} + +PatternSet::PatternSet(const char* filepath) { + _patterns = 0; + _pat_len = 0; + _mmap = (char*)MAP_FAILED; + _mmap_size = 0; + _pat_num = -1; + _errmsg = ""; + + if (!ExtractPattern(filepath)) + Cleanup(); +} + +bool +getFilesUnderDir(vector<string>& files, const char* path) { + files.clear(); + + DIR* dir = opendir(path); + if (!dir) + return false; + + string path_dir = path; + path_dir += "/"; + + for (;;) { + struct dirent* entry = readdir(dir); + if (entry) { + string filepath = path_dir + entry->d_name; + struct stat file_stat; + if (stat(filepath.c_str(), &file_stat)) { + closedir(dir); + return false; + } + + if (S_ISREG(file_stat.st_mode)) + files.push_back(filepath); + + continue; + } + + if (errno) { + return false; + } + break; + } + closedir(dir); + return true; +} + +class Timer { +public: + Timer() { + my_clock_gettime(&_start); + _stop = _start; + _acc.tv_sec = 0; + _acc.tv_nsec = 0; + } + + const Timer& operator += (const Timer& that) { + time_t sec = _acc.tv_sec + that._acc.tv_sec; + long nsec = _acc.tv_nsec + that._acc.tv_nsec; + if (nsec > 1000000000) { + nsec -= 1000000000; + sec += 1; + } + _acc.tv_sec = sec; + _acc.tv_nsec = nsec; + return *this; + } + + // return duration in us + size_t getDuration() const { + return _acc.tv_sec * (size_t)1000000 + _acc.tv_nsec/1000; + } + + void Start(bool acc=true) { + my_clock_gettime(&_start); + } + + void Stop() { + my_clock_gettime(&_stop); + struct timespec t = CalcDuration(); + _acc = add_duration(_acc, t); + } + +private: + int my_clock_gettime(struct timespec* t) { +#ifdef __linux + return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); +#else + struct timeval tv; + int rc = gettimeofday(&tv, 0); + t->tv_sec = tv.tv_sec; + t->tv_nsec = tv.tv_usec * 1000; + return rc; +#endif + } + + struct timespec add_duration(const struct timespec& dur1, + const struct timespec& dur2) { + time_t sec = dur1.tv_sec + dur2.tv_sec; + long nsec = dur1.tv_nsec + dur2.tv_nsec; + if (nsec > 1000000000) { + nsec -= 1000000000; + sec += 1; + } + timespec t; + t.tv_sec = sec; + t.tv_nsec = nsec; + + return t; + } + + struct timespec CalcDuration() const { + timespec diff; + if ((_stop.tv_nsec - _start.tv_nsec)<0) { + diff.tv_sec = _stop.tv_sec - _start.tv_sec - 1; + diff.tv_nsec = 1000000000 + _stop.tv_nsec - _start.tv_nsec; + } else { + diff.tv_sec = _stop.tv_sec - _start.tv_sec; + diff.tv_nsec = _stop.tv_nsec - _start.tv_nsec; + } + return diff; + } + + struct timespec _start; + struct timespec _stop; + struct timespec _acc; +}; + +class Benchmark { +public: + Benchmark(const PatternSet& pat_set, const char* infile): + _pat_set(pat_set), _infile(infile) { + _mmap = (char*)MAP_FAILED; + _file_sz = 0; + _fd = -1; + } + + ~Benchmark() { + if (_mmap != MAP_FAILED) + munmap(_mmap, _file_sz); + if (_fd != -1) + close(_fd); + } + + bool Run(int iteration); + const Timer& getTimer() const { return _timer; } + +private: + const PatternSet& _pat_set; + const char* _infile; + char* _mmap; + int _fd; + size_t _file_sz; // input file size + Timer _timer; +}; + +bool +Benchmark::Run(int iteration) { + if (_pat_set.getPatternNum() <= 0) { + SomethingWrong = true; + return false; + } + + if (_mmap == MAP_FAILED) { + struct stat filestat; + if (stat(_infile, &filestat)) { + SomethingWrong = true; + return false; + } + + if (!S_ISREG(filestat.st_mode)) { + SomethingWrong = true; + return false; + } + + _fd = open(_infile, 0); + if (_fd == -1) + return false; + + _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE, _fd, 0); + + if (_mmap == MAP_FAILED) { + SomethingWrong = true; + return false; + } + + _file_sz = filestat.st_size; + } + + ac_t* ac = ac_create(_pat_set.getPatternVector(), + _pat_set.getPatternLenVector(), + _pat_set.getPatternNum()); + if (!ac) { + SomethingWrong = true; + return false; + } + + int piece_num = _file_sz/piece_size; + + _timer.Start(false); + + /* Stupid compiler may not be able to promote piece_size into register. + * Do it manually. + */ + int piece_sz = piece_size; + for (int i = 0; i < iteration; i++) { + size_t match_ofst = 0; + for (int piece_idx = 0; piece_idx < piece_num; piece_idx ++) { + ac_match2(ac, _mmap + match_ofst, piece_sz); + match_ofst += piece_sz; + } + if (match_ofst != _file_sz) + ac_match2(ac, _mmap + match_ofst, _file_sz - match_ofst); + } + _timer.Stop(); + return true; +} + +const char* short_opt = "hd:f:i:p:"; +const struct option long_opts[] = { + {"help", no_argument, 0, 'h'}, + {"iteration", required_argument, 0, 'i'}, + {"dictionary-dir", required_argument, 0, 'd'}, + {"obj-file-dir", required_argument, 0, 'f'}, + {"piece-size", required_argument, 0, 'p'}, +}; + +static void +PrintHelp(const char* prog_name) { + const char* msg = +"Usage %s [OPTIONS]\n" +" -d, --dictionary-dir : specify the dictionary directory (./dict by default)\n" +" -f, --obj-file-dir : specify the object file directory\n" +" (./testinput by default)\n" +" -i, --iteration : Run this many iteration for each pattern match\n" +" -p, --piece-size : The size of 'piece' in byte. The input file is\n" +" divided into pieces, and match function is working\n" +" on one piece at a time. The default size of piece\n" +" is 1k byte.\n"; + + fprintf(stdout, msg, prog_name); +} + +static bool +getOptions(int argc, char** argv) { + bool dict_dir_set = false; + bool objfile_dir_set = false; + int opt_index; + + while (1) { + if (print_help) break; + + int c = getopt_long(argc, argv, short_opt, long_opts, &opt_index); + + if (c == -1) break; + if (c == 0) { c = long_opts[opt_index].val; } + + switch(c) { + case 'h': + print_help = true; + break; + + case 'i': + iteration = atol(optarg); + break; + + case 'd': + dict_dir = optarg; + dict_dir_set = true; + break; + + case 'f': + obj_file_dir = optarg; + objfile_dir_set = true; + break; + + case 'p': + piece_size = atol(optarg); + break; + + case '?': + default: + return false; + } + } + + if (print_help) + return true; + + string basedir(dirname(argv[0])); + if (!dict_dir_set) + dict_dir = basedir + "/dict"; + + if (!objfile_dir_set) + obj_file_dir = basedir + "/testinput"; + + return true; +} + +int +main(int argc, char** argv) { + if (!getOptions(argc, argv)) + return -1; + + if (print_help) { + PrintHelp(argv[0]); + return 0; + } + +#ifndef __linux + fprintf(stdout, "\n!!!WARNING: On this OS, the execution time is measured" + " by gettimeofday(2) which is imprecise!!!\n\n"); +#endif + + fprintf(stdout, "Test with iteration = %d, piece size = %d, and", + iteration, piece_size); + fprintf(stdout, "\n dictionary dir = %s\n object file dir = %s\n\n", + dict_dir.c_str(), obj_file_dir.c_str()); + + vector<string> dict_files; + vector<string> input_files; + + if (!getFilesUnderDir(dict_files, dict_dir.c_str())) { + fprintf(stdout, "fail to find dictionary files\n"); + return -1; + } + + if (!getFilesUnderDir(input_files, obj_file_dir.c_str())) { + fprintf(stdout, "fail to find test input files\n"); + return -1; + } + + for (vector<string>::iterator diter = dict_files.begin(), + diter_e = dict_files.end(); diter != diter_e; ++diter) { + + const char* dict_name = diter->c_str(); + if (!PatternSet::isDictFile(dict_name)) + continue; + + PatternSet ps(dict_name); + if (ps.getPatternNum() <= 0) { + fprintf(stdout, "fail to open dictionary file %s : %s\n", + dict_name, ps.getErrMessage()); + SomethingWrong = true; + continue; + } + + fprintf(stdout, "Using dictionary %s\n", dict_name); + Timer timer; + for (vector<string>::iterator iter = input_files.begin(), + iter_e = input_files.end(); iter != iter_e; ++iter) { + fprintf(stdout, " testing %s ... ", iter->c_str()); + fflush(stdout); + Benchmark bm(ps, iter->c_str()); + bm.Run(iteration); + const Timer& t = bm.getTimer(); + timer += bm.getTimer(); + fprintf(stdout, "elapsed %.3f\n", t.getDuration() / 1000000.0); + } + + fprintf(stdout, + "\n==========================================================\n" + " Total Elapse %.3f\n\n", timer.getDuration() / 1000000.0); + } + + return SomethingWrong ? -1 : 0; +} diff --git a/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx b/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx new file mode 100644 index 0000000..4ea02bc --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/ac_test_aggr.cxx @@ -0,0 +1,135 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +#include <stdio.h> +#include <string.h> +#include <vector> +#include <string> + +#include "ac.h" +#include "ac_util.hpp" +#include "test_base.hpp" + +using namespace std; + +namespace { +class ACBigFileTester : public BigFileTester { +public: + ACBigFileTester(const char* filepath) : BigFileTester(filepath){}; + +private: + virtual buf_header_t* PM_Create(const char** strv, uint32* strlenv, + uint32 vect_len) { + return (buf_header_t*)ac_create(strv, strlenv, vect_len); + } + + virtual void PM_Free(buf_header_t* PM) { ac_free(PM); } + virtual bool Run_Helper(buf_header_t* PM); +}; + +class ACTestAggressive: public ACTestBase { +public: + ACTestAggressive(const vector<const char*>& files, const char* banner) + : ACTestBase(banner), _files(files) {} + virtual bool Run(); + +private: + void PrintSummary(int total, int fail) { + fprintf(stdout, "Test count : %d, fail: %d\n", total, fail); + fflush(stdout); + } + vector<const char*> _files; +}; + +} // end of anonymous namespace + +bool +ACBigFileTester::Run_Helper(buf_header_t* PM) { + int fail = 0; + // advance one chunk at a time. + int len = _msg_len; + int chunk_sz = _chunk_sz; + + vector<const char*> c_style_keys; + for (int i = 0, e = _keys.size(); i != e; i++) { + const char* key = _keys[i].first; + int len = _keys[i].second; + char *t = new char[len+1]; + memcpy(t, key, len); + t[len] = '\0'; + c_style_keys.push_back(t); + } + + for (int ofst = 0, chunk_idx = 0, chunk_num = _chunk_num; + chunk_idx < chunk_num; ofst += chunk_sz, chunk_idx++) { + const char* substring = _msg + ofst; + ac_result_t r = ac_match((ac_t*)(void*)PM, substring , len - ofst); + int m_b = r.match_begin; + int m_e = r.match_end; + + if (m_b < 0 || m_e < 0 || m_e <= m_b || m_e >= len) { + fprintf(stdout, "fail to find match substring[%d:%d])\n", + ofst, len - 1); + fail ++; + continue; + } + + const char* match_str = _msg + len; + int strstr_len = 0; + int key_idx = -1; + + for (int i = 0, e = c_style_keys.size(); i != e; i++) { + const char* key = c_style_keys[i]; + if (const char *m = strstr(substring, key)) { + if (m < match_str) { + match_str = m; + strstr_len = _keys[i].second; + key_idx = i; + } + } + } + ASSERT(key_idx != -1); + if ((match_str - substring != m_b)) { + fprintf(stdout, + "Fail to find match substring[%d:%d])," + " expected to find match at offset %d instead of %d\n", + ofst, len - 1, + (int)(match_str - _msg), ofst + m_b); + fprintf(stdout, "%d vs %d (key idx %d)\n", strstr_len, m_e - m_b + 1, key_idx); + PrintStr(stdout, match_str, strstr_len); + fprintf(stdout, "\n"); + PrintStr(stdout, _msg + ofst + m_b, + m_e - m_b + 1); + fprintf(stdout, "\n"); + fail ++; + } + } + for (vector<const char*>::iterator i = c_style_keys.begin(), + e = c_style_keys.end(); i != e; i++) { + delete[] *i; + } + + return fail == 0; +} + +bool +ACTestAggressive::Run() { + int fail = 0; + for (vector<const char*>::iterator i = _files.begin(), e = _files.end(); + i != e; i++) { + ACBigFileTester bft(*i); + if (!bft.Run()) + fail ++; + } + return fail == 0; +} + +bool +Run_AC_Aggressive_Test(const vector<const char*>& files) { + ACTestAggressive t(files, "AC Aggressive test"); + t.PrintBanner(); + return t.Run(); +} diff --git a/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx b/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx new file mode 100644 index 0000000..fa2d7fd --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx @@ -0,0 +1,275 @@ +#include <stdio.h> +#include <string.h> +#include <vector> +#include <string> + +#include "ac.h" +#include "ac_util.hpp" +#include "test_base.hpp" + +using namespace std; + +namespace { +typedef struct { + const char* str; + const char* match; +} StrPair; + +typedef enum { + MV_FIRST_MATCH = 0, + MV_LEFT_LONGEST = 1, +} MatchVariant; + +typedef struct { + const char* name; + const char** dict; + StrPair* strpairs; + int dict_len; + int strpair_num; + MatchVariant match_variant; +} TestingCase; + +class Tests { +public: + Tests(const char* name, + const char* dict[], int dict_len, + StrPair strpairs[], int strpair_num, + MatchVariant mv = MV_FIRST_MATCH) { + if (!_tests) + _tests = new vector<TestingCase>; + + TestingCase tc; + tc.name = name; + tc.dict = dict; + tc.strpairs = strpairs; + tc.dict_len = dict_len; + tc.strpair_num = strpair_num; + tc.match_variant = mv; + _tests->push_back(tc); + } + + static vector<TestingCase>* Get_Tests() { return _tests; } + static void Erase_Tests() { delete _tests; _tests = 0; } + +private: + static vector<TestingCase> *_tests; +}; + +class LeftLongestTests : public Tests { +public: + LeftLongestTests (const char* name, const char* dict[], int dict_len, + StrPair strpairs[], int strpair_num): + Tests(name, dict, dict_len, strpairs, strpair_num, MV_LEFT_LONGEST) { + } +}; + +vector<TestingCase>* Tests::_tests = 0; + +class ACTestSimple: public ACTestBase { +public: + ACTestSimple(const char* banner) : ACTestBase(banner) {} + virtual bool Run(); + +private: + void PrintSummary(int total, int fail) { + fprintf(stdout, "Test count : %d, fail: %d\n", total, fail); + fflush(stdout); + } +}; +} + +bool +ACTestSimple::Run() { + int total = 0; + int fail = 0; + + vector<TestingCase> *tests = Tests::Get_Tests(); + if (!tests) { + PrintSummary(0, 0); + return true; + } + + for (vector<TestingCase>::iterator i = tests->begin(), e = tests->end(); + i != e; i++) { + TestingCase& t = *i; + int dict_len = t.dict_len; + unsigned int* strlen_v = new unsigned int[dict_len]; + + fprintf(stdout, ">Testing %s\nDictionary:[ ", t.name); + for (int i = 0, need_break=0; i < dict_len; i++) { + const char* s = t.dict[i]; + fprintf(stdout, "%s, ", s); + strlen_v[i] = strlen(s); + if (need_break++ == 16) { + fputs("\n ", stdout); + need_break = 0; + } + } + fputs("]\n", stdout); + + /* Create the dictionary */ + ac_t* ac = ac_create(t.dict, strlen_v, dict_len); + delete[] strlen_v; + + for (int ii = 0, ee = t.strpair_num; ii < ee; ii++, total++) { + const StrPair& sp = t.strpairs[ii]; + const char *str = sp.str; // the string to be matched + const char *match = sp.match; + + fprintf(stdout, "[%3d] Testing '%s' : ", total, str); + + int len = strlen(str); + ac_result_t r; + if (t.match_variant == MV_FIRST_MATCH) + r = ac_match(ac, str, len); + else if (t.match_variant == MV_LEFT_LONGEST) + r = ac_match_longest_l(ac, str, len); + else { + ASSERT(false && "Unknown variant"); + } + + int m_b = r.match_begin; + int m_e = r.match_end; + + // The return value per se is insane. + if (m_b > m_e || + ((m_b < 0 || m_e < 0) && (m_b != -1 || m_e != -1))) { + fprintf(stdout, "Insane return value (%d, %d)\n", m_b, m_e); + fail ++; + continue; + } + + // If the string is not supposed to match the dictionary. + if (!match) { + if (m_b != -1 || m_e != -1) { + fail ++; + fprintf(stdout, "Not Supposed to match (%d, %d) \n", + m_b, m_e); + } else + fputs("Pass\n", stdout); + continue; + } + + // The string or its substring is match the dict. + if (m_b >= len || m_b >= len) { + fail ++; + fprintf(stdout, + "Return value >= the length of the string (%d, %d)\n", + m_b, m_e); + continue; + } else { + int mlen = strlen(match); + if ((mlen != m_e - m_b + 1) || + strncmp(str + m_b, match, mlen)) { + fail ++; + fprintf(stdout, "Fail\n"); + } else + fprintf(stdout, "Pass\n"); + } + } + fputs("\n", stdout); + ac_free(ac); + } + + PrintSummary(total, fail); + return fail == 0; +} + +bool +Run_AC_Simple_Test() { + ACTestSimple t("AC Simple test"); + t.PrintBanner(); + return t.Run(); +} + +////////////////////////////////////////////////////////////////////////////// +// +// Testing cases for first-match variant (i.e. test ac_match()) +// +////////////////////////////////////////////////////////////////////////////// +// + +/* test 1*/ +const char *dict1[] = {"he", "she", "his", "her"}; +StrPair strpair1[] = { + {"he", "he"}, {"she", "she"}, {"his", "his"}, + {"hers", "he"}, {"ahe", "he"}, {"shhe", "he"}, + {"shis2", "his"}, {"ahhe", "he"} +}; +Tests test1("test 1", + dict1, sizeof(dict1)/sizeof(dict1[0]), + strpair1, sizeof(strpair1)/sizeof(strpair1[0])); + +/* test 2*/ +const char *dict2[] = {"poto", "poto"}; /* duplicated strings*/ +StrPair strpair2[] = {{"The pot had a handle", 0}}; +Tests test2("test 2", dict2, 2, strpair2, 1); + +/* test 3*/ +const char *dict3[] = {"The"}; +StrPair strpair3[] = {{"The pot had a handle", "The"}}; +Tests test3("test 3", dict3, 1, strpair3, 1); + +/* test 4*/ +const char *dict4[] = {"pot"}; +StrPair strpair4[] = {{"The pot had a handle", "pot"}}; +Tests test4("test 4", dict4, 1, strpair4, 1); + +/* test 5*/ +const char *dict5[] = {"pot "}; +StrPair strpair5[] = {{"The pot had a handle", "pot "}}; +Tests test5("test 5", dict5, 1, strpair5, 1); + +/* test 6*/ +const char *dict6[] = {"ot h"}; +StrPair strpair6[] = {{"The pot had a handle", "ot h"}}; +Tests test6("test 6", dict6, 1, strpair6, 1); + +/* test 7*/ +const char *dict7[] = {"andle"}; +StrPair strpair7[] = {{"The pot had a handle", "andle"}}; +Tests test7("test 7", dict7, 1, strpair7, 1); + +const char *dict8[] = {"aaab"}; +StrPair strpair8[] = {{"aaaaaaab", "aaab"}}; +Tests test8("test 8", dict8, 1, strpair8, 1); + +const char *dict9[] = {"haha", "z"}; +StrPair strpair9[] = {{"aaaaz", "z"}, {"z", "z"}}; +Tests test9("test 9", dict9, 2, strpair9, 2); + +/* test the case when input string dosen't contain even a single char + * of the pattern in dictionary. + */ +const char *dict10[] = {"abc"}; +StrPair strpair10[] = {{"cde", 0}}; +Tests test10("test 10", dict10, 1, strpair10, 1); + + +////////////////////////////////////////////////////////////////////////////// +// +// Testing cases for first longest match variant (i.e. +// test ac_match_longest_l()) +// +////////////////////////////////////////////////////////////////////////////// +// + +// This was actually first motivation for left-longest-match +const char *dict100[] = {"Mozilla", "Mozilla Mobile"}; +StrPair strpair100[] = {{"User Agent containing string Mozilla Mobile", "Mozilla Mobile"}}; +LeftLongestTests test100("l_test 100", dict100, 2, strpair100, 1); + +// Dict with single char is tricky +const char *dict101[] = {"a", "abc"}; +StrPair strpair101[] = {{"abcdef", "abc"}}; +LeftLongestTests test101("l_test 101", dict101, 2, strpair101, 1); + +// Testing case with partially overlapping patterns. The purpose is to +// check if the fail-link leading from terminal state is correct. +// +// The fail-link leading from terminal-state does not matter in +// match-first-occurrence variant, as it stop when a terminal is hit. +// +const char *dict102[] = {"abc", "bcdef"}; +StrPair strpair102[] = {{"abcdef", "bcdef"}}; +LeftLongestTests test102("l_test 102", dict102, 2, strpair102, 1); diff --git a/modules/policy/lua-aho-corasick/tests/dict/README.txt b/modules/policy/lua-aho-corasick/tests/dict/README.txt new file mode 100644 index 0000000..cd50b41 --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/dict/README.txt @@ -0,0 +1 @@ +This directory contains pattern set of benchmark purpose. diff --git a/modules/policy/lua-aho-corasick/tests/dict/dict1.txt b/modules/policy/lua-aho-corasick/tests/dict/dict1.txt new file mode 100644 index 0000000..94085a9 --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/dict/dict1.txt @@ -0,0 +1,11 @@ +false_return@ +forloop#haha +wtfprogram +mmaporunmap +ThIs?Module!IsEssential +struct rtlwtf +gettIMEOfdayWrong +edistribution_and_use_in_@source +Copyright~#@ +while {! +!%SQLinje diff --git a/modules/policy/lua-aho-corasick/tests/load_ac_test.lua b/modules/policy/lua-aho-corasick/tests/load_ac_test.lua new file mode 100644 index 0000000..7fb7db9 --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/load_ac_test.lua @@ -0,0 +1,82 @@ +-- This script is to test load_ac.lua +-- +-- Some notes: +-- 1. The purpose of this script is not to check if the libac.so work +-- properly, it is to check if there are something stupid in load_ac.lua +-- +-- 2. There are bunch of collectgarbage() calls, the purpose is to make +-- sure the shared lib is not unloaded after GC. + +-- load_ac.lua looks up libac.so via package.cpath rather than LD_LIBRARY_PATH, +-- prepend (instead of appending) some insane paths here to see if it quit +-- prematurely. +-- +package.cpath = ".;./?.so;" .. package.cpath + +local ac = require "load_ac" + +local ac_create = ac.create_ac +local ac_match = ac.match +local string_fmt = string.format +local string_sub = string.sub + +local err_cnt = 0 +local function mytest(testname, dict, match, notmatch) + print(">Testing ", testname) + + io.write(string_fmt("Dictionary: ")); + for i=1, #dict do + io.write(string_fmt("%s, ", dict[i])) + end + print "" + + local ac_inst = ac_create(dict); + collectgarbage() + for i=1, #match do + local str = match[i] + io.write(string_fmt("Matching %s, ", str)) + local b = ac_match(ac_inst, str) + if b then + print "pass" + else + err_cnt = err_cnt + 1 + print "fail" + end + collectgarbage() + end + + if notmatch == nil then + return + end + + collectgarbage() + + for i = 1, #notmatch do + local str = notmatch[i] + io.write(string_fmt("*Matching %s, ", str)) + local r = ac_match(ac_inst, str) + if r then + err_cnt = err_cnt + 1 + print("fail") + else + print("succ") + end + collectgarbage() + end + ac_inst = nil + collectgarbage() +end + +print("") +print("====== Test to see if load_ac.lua works properly ========") + +mytest("test1", + {"he", "she", "his", "her", "str\0ing"}, + -- matching cases + { "he", "she", "his", "hers", "ahe", "shhe", "shis2", "ahhe", "str\0ing" }, + + -- not matching case + {"str\0", "str"} + ) + +os.exit((err_cnt == 0) and 0 or 1) diff --git a/modules/policy/lua-aho-corasick/tests/lua_test.lua b/modules/policy/lua-aho-corasick/tests/lua_test.lua new file mode 100644 index 0000000..cfe178f --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/lua_test.lua @@ -0,0 +1,67 @@ +-- This script is to test ahocorasick.so not libac.so +-- +local ac = require "ahocorasick" + +local ac_create = ac.create +local ac_match = ac.match +local string_fmt = string.format +local string_sub = string.sub + +local err_cnt = 0 +local function mytest(testname, dict, match, notmatch) + print(">Testing ", testname) + + io.write(string_fmt("Dictionary: ")); + for i=1, #dict do + io.write(string_fmt("%s, ", dict[i])) + end + print "" + + local ac_inst = ac_create(dict); + for i=1, #match do + local str = match[i][1] + local substr = match[i][2] + io.write(string_fmt("Matching %s, ", str)) + local b, e = ac_match(ac_inst, str) + if b and e and (string_sub(str, b+1, e+1) == substr) then + print "pass" + else + err_cnt = err_cnt + 1 + print "fail" + end + --print("gc is called") + collectgarbage() + end + + if notmatch == nil then + return + end + + for i = 1, #notmatch do + local str = notmatch[i] + io.write(string_fmt("*Matching %s, ", str)) + local r = ac_match(ac_inst, str) + if r then + err_cnt = err_cnt + 1 + print("fail") + else + print("succ") + end + collectgarbage() + end +end + +mytest("test1", + {"he", "she", "his", "her", "str\0ing"}, + -- matching cases + { {"he", "he"}, {"she", "she"}, {"his", "his"}, {"hers", "he"}, + {"ahe", "he"}, {"shhe", "he"}, {"shis2", "his"}, {"ahhe", "he"}, + {"str\0ing", "str\0ing"} + }, + + -- not matching case + {"str\0", "str"} + + ) + +os.exit((err_cnt == 0) and 0 or 1) diff --git a/modules/policy/lua-aho-corasick/tests/test_base.hpp b/modules/policy/lua-aho-corasick/tests/test_base.hpp new file mode 100644 index 0000000..7758371 --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/test_base.hpp @@ -0,0 +1,60 @@ +#ifndef TEST_BASE_H +#define TEST_BASE_H + +#include <stdio.h> +#include <string> +#include <stdint.h> + +using namespace std; +class ACTestBase { +public: + ACTestBase(const char* name) :_banner(name) {} + virtual void PrintBanner() { + fprintf(stdout, "\n===== %s ====\n", _banner.c_str()); + } + + virtual bool Run() = 0; +private: + string _banner; +}; + +typedef std::pair<const char*, int> StrInfo; +class BigFileTester { +public: + BigFileTester(const char* filepath); + virtual ~BigFileTester() { Cleanup(); } + + bool Run(); + +protected: + virtual buf_header_t* PM_Create(const char** strv, uint32_t* strlenv, + uint32_t vect_len) = 0; + virtual void PM_Free(buf_header_t*) = 0; + virtual bool Run_Helper(buf_header_t* PM) = 0; + + // Return true if the '\0' is valid char of a string. + virtual bool Str_C_Style() { return true; } + + bool GenerateKeys(); + void Cleanup(); + void PrintStr(FILE*, const char* str, int len); + +protected: + const char* _filepath; + int _fd; + vector<StrInfo> _keys; + char* _msg; + int _msg_len; + int _key_num; // number of strings in dictionary + int _chunk_sz; + int _chunk_num; + + int _max_key_num; + int _key_min_len; + int _key_max_len; +}; + +extern bool Run_AC_Simple_Test(); +extern bool Run_AC_Aggressive_Test(const vector<const char*>& files); + +#endif diff --git a/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx b/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx new file mode 100644 index 0000000..f189d8d --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/test_bigfile.cxx @@ -0,0 +1,167 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +#include <stdio.h> +#include <string.h> +#include <vector> +#include <string> + +#include "ac.h" +#include "ac_util.hpp" +#include "test_base.hpp" + +/////////////////////////////////////////////////////////////////////////// +// +// Implementation of BigFileTester +// +/////////////////////////////////////////////////////////////////////////// +// +BigFileTester::BigFileTester(const char* filepath) { + _filepath = filepath; + _fd = -1; + _msg = (char*)MAP_FAILED; + _msg_len = 0; + _key_num = 0; + _chunk_sz = 0; + _chunk_num = 0; + + _max_key_num = 100; + _key_min_len = 20; + _key_max_len = 80; +} + +void +BigFileTester::Cleanup() { + if (_msg != MAP_FAILED) { + munmap((void*)_msg, _msg_len); + _msg = (char*)MAP_FAILED; + _msg_len = 0; + } + + if (_fd != -1) { + close(_fd); + _fd = -1; + } +} + +bool +BigFileTester::GenerateKeys() { + int chunk_sz = 4096; + int max_key_num = _max_key_num; + int key_min_len = _key_min_len; + int key_max_len = _key_max_len; + + int t = _msg_len / chunk_sz; + int keynum = t > max_key_num ? max_key_num : t; + + if (keynum <= 4) { + // file is too small + return false; + } + chunk_sz = _msg_len / keynum; + _chunk_sz = chunk_sz; + + // For each chunck, "randomly" grab a sub-string searving + // as key. + int random_ofst[] = { 12, 30, 23, 15 }; + int rofstsz = sizeof(random_ofst)/sizeof(random_ofst[0]); + int ofst = 0; + const char* msg = _msg; + _chunk_num = keynum - 1; + for (int idx = 0, e = _chunk_num; idx < e; idx++) { + const char* key = msg + ofst + idx % rofstsz; + int key_len = key_min_len + idx % (key_max_len - key_min_len); + _keys.push_back(StrInfo(key, key_len)); + ofst += chunk_sz; + } + return true; +} + +bool +BigFileTester::Run() { + // Step 1: Bring the file into memory + fprintf(stdout, "Testing using file '%s'...\n", _filepath); + + int fd = _fd = ::open(_filepath, O_RDONLY); + if (fd == -1) { + perror("open"); + return false; + } + + struct stat sb; + if (fstat(fd, &sb) == -1) { + perror("fstat"); + return false; + } + + if (!S_ISREG (sb.st_mode)) { + fprintf(stderr, "%s is not regular file\n", _filepath); + return false; + } + + int ten_M = 1024 * 1024 * 10; + int map_sz = _msg_len = sb.st_size > ten_M ? ten_M : sb.st_size; + char* p = _msg = + (char*)mmap (0, map_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) { + perror("mmap"); + return false; + } + + // Get rid of '\0' if we are picky at it. + if (Str_C_Style()) { + for (int i = 0; i < map_sz; i++) { if (!p[i]) p[i] = 'a'; } + p[map_sz - 1] = 0; + } + + // Step 2: "Fabricate" some keys from the file. + if (!GenerateKeys()) { + close(fd); + return false; + } + + // Step 3: Create PM instance + const char** keys = new const char*[_keys.size()]; + unsigned int* keylens = new unsigned int[_keys.size()]; + + int i = 0; + for (vector<StrInfo>::iterator si = _keys.begin(), se = _keys.end(); + si != se; si++, i++) { + const StrInfo& strinfo = *si; + keys[i] = strinfo.first; + keylens[i] = strinfo.second; + } + + buf_header_t* PM = PM_Create(keys, keylens, i); + delete[] keys; + delete[] keylens; + + // Step 4: Run testing + bool res = Run_Helper(PM); + PM_Free(PM); + + // Step 5: Clanup + munmap(p, map_sz); + _msg = (char*)MAP_FAILED; + close(fd); + _fd = -1; + + fprintf(stdout, "%s\n", res ? "succ" : "fail"); + return res; +} + +void +BigFileTester::PrintStr(FILE* f, const char* str, int len) { + fprintf(f, "{"); + for (int i = 0; i < len; i++) { + unsigned char c = str[i]; + if (isprint(c)) + fprintf(f, "'%c', ", c); + else + fprintf(f, "%#x, ", c); + } + fprintf(f, "}"); +}; diff --git a/modules/policy/lua-aho-corasick/tests/test_main.cxx b/modules/policy/lua-aho-corasick/tests/test_main.cxx new file mode 100644 index 0000000..b4f5225 --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/test_main.cxx @@ -0,0 +1,33 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +#include <stdio.h> +#include <string.h> +#include <vector> +#include <string> +#include "ac.h" +#include "ac_util.hpp" +#include "test_base.hpp" + +using namespace std; + + +///////////////////////////////////////////////////////////////////////// +// +// Simple (yet maybe tricky) testings +// +///////////////////////////////////////////////////////////////////////// +// +int +main (int argc, char** argv) { + bool succ = Run_AC_Simple_Test(); + + vector<const char*> files; + for (int i = 1; i < argc; i++) { files.push_back(argv[i]); } + succ = Run_AC_Aggressive_Test(files) && succ; + + return succ ? 0 : -1; +}; |