From 830407e88f9d40d954356c3754f2647f91d5c06a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 17:26:00 +0200 Subject: Adding upstream version 5.6.0. Signed-off-by: Daniel Baumann --- modules/policy/lua-aho-corasick/tests/ac_bench.cxx | 519 +++++++++++++++++++++ 1 file changed, 519 insertions(+) create mode 100644 modules/policy/lua-aho-corasick/tests/ac_bench.cxx (limited to 'modules/policy/lua-aho-corasick/tests/ac_bench.cxx') diff --git a/modules/policy/lua-aho-corasick/tests/ac_bench.cxx b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx new file mode 100644 index 0000000..421322c --- /dev/null +++ b/modules/policy/lua-aho-corasick/tests/ac_bench.cxx @@ -0,0 +1,519 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "ac.h" +#include "ac_util.hpp" + +using namespace std; + +static bool SomethingWrong = false; + +static int iteration = 300; +static string dict_dir; +static string obj_file_dir; +static bool print_help = false; +static int piece_size = 1024; + +class PatternSet { +public: + PatternSet(const char* filepath); + ~PatternSet() { Cleanup(); } + + int getPatternNum() const { return _pat_num; } + const char** getPatternVector() const { return _patterns; } + unsigned int* getPatternLenVector() const { return _pat_len; } + + const char* getErrMessage() const { return _errmsg; } + static bool isDictFile(const char* filepath) { + if (strncmp(basename(const_cast(filepath)), "dict", 4)) + return false; + return true; + } + +private: + bool ExtractPattern(const char* filepath); + void Cleanup(); + + const char** _patterns; + unsigned int* _pat_len; + char* _mmap; + int _fd; + size_t _mmap_size; + int _pat_num; + + const char* _errmsg; +}; + +bool +PatternSet::ExtractPattern(const char* filepath) { + if (!isDictFile(filepath)) + return false; + + struct stat filestat; + if (stat(filepath, &filestat)) { + _errmsg = "fail to call stat()"; + return false; + } + + if (filestat.st_size > 4096 * 1024) { + /* It dosen't seem to be a dictionary file*/ + _errmsg = "file too big?"; + return false; + } + + _fd = open(filepath, 0); + if (_fd == -1) { + _errmsg = "fail to open dictionary file"; + return false; + } + + _mmap_size = filestat.st_size; + _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE, _fd, 0); + if (_mmap == MAP_FAILED) { + _errmsg = "fail to call mmap"; + return false; + } + + const char* pat = _mmap; + vector pat_vect; + vector pat_len_vect; + + for (size_t i = 0, e = filestat.st_size; i < e; i++) { + if (_mmap[i] == '\r' || _mmap[i] == '\n') { + _mmap[i] = '\0'; + int len = _mmap + i - pat; + if (len > 0) { + pat_vect.push_back(pat); + pat_len_vect.push_back(len); + } + pat = _mmap + i + 1; + } + } + + ASSERT(pat_vect.size() == pat_len_vect.size()); + + int pat_num = pat_vect.size(); + if (pat_num > 0) { + const char** p = _patterns = new const char*[pat_num]; + int i = 0; + for (vector::iterator iter = pat_vect.begin(), + iter_e = pat_vect.end(); iter != iter_e; ++iter) { + p[i++] = *iter; + } + + i = 0; + unsigned int* q = _pat_len = new unsigned int[pat_num]; + for (vector::iterator iter = pat_len_vect.begin(), + iter_e = pat_len_vect.end(); iter != iter_e; ++iter) { + q[i++] = *iter; + } + } + + _pat_num = pat_num; + if (pat_num <= 0) { + _errmsg = "no pattern at all"; + return false; + } + + return true; +} + +void +PatternSet::Cleanup() { + if (_mmap != MAP_FAILED) { + munmap(_mmap, _mmap_size); + _mmap = (char*)MAP_FAILED; + _mmap_size = 0; + } + + delete[] _patterns; + delete[] _pat_len; + if (_fd != -1) + close(_fd); + _pat_num = -1; +} + +PatternSet::PatternSet(const char* filepath) { + _patterns = 0; + _pat_len = 0; + _mmap = (char*)MAP_FAILED; + _mmap_size = 0; + _pat_num = -1; + _errmsg = ""; + + if (!ExtractPattern(filepath)) + Cleanup(); +} + +bool +getFilesUnderDir(vector& files, const char* path) { + files.clear(); + + DIR* dir = opendir(path); + if (!dir) + return false; + + string path_dir = path; + path_dir += "/"; + + for (;;) { + struct dirent* entry = readdir(dir); + if (entry) { + string filepath = path_dir + entry->d_name; + struct stat file_stat; + if (stat(filepath.c_str(), &file_stat)) { + closedir(dir); + return false; + } + + if (S_ISREG(file_stat.st_mode)) + files.push_back(filepath); + + continue; + } + + if (errno) { + return false; + } + break; + } + closedir(dir); + return true; +} + +class Timer { +public: + Timer() { + my_clock_gettime(&_start); + _stop = _start; + _acc.tv_sec = 0; + _acc.tv_nsec = 0; + } + + const Timer& operator += (const Timer& that) { + time_t sec = _acc.tv_sec + that._acc.tv_sec; + long nsec = _acc.tv_nsec + that._acc.tv_nsec; + if (nsec > 1000000000) { + nsec -= 1000000000; + sec += 1; + } + _acc.tv_sec = sec; + _acc.tv_nsec = nsec; + return *this; + } + + // return duration in us + size_t getDuration() const { + return _acc.tv_sec * (size_t)1000000 + _acc.tv_nsec/1000; + } + + void Start(bool acc=true) { + my_clock_gettime(&_start); + } + + void Stop() { + my_clock_gettime(&_stop); + struct timespec t = CalcDuration(); + _acc = add_duration(_acc, t); + } + +private: + int my_clock_gettime(struct timespec* t) { +#ifdef __linux + return clock_gettime(CLOCK_PROCESS_CPUTIME_ID, t); +#else + struct timeval tv; + int rc = gettimeofday(&tv, 0); + t->tv_sec = tv.tv_sec; + t->tv_nsec = tv.tv_usec * 1000; + return rc; +#endif + } + + struct timespec add_duration(const struct timespec& dur1, + const struct timespec& dur2) { + time_t sec = dur1.tv_sec + dur2.tv_sec; + long nsec = dur1.tv_nsec + dur2.tv_nsec; + if (nsec > 1000000000) { + nsec -= 1000000000; + sec += 1; + } + timespec t; + t.tv_sec = sec; + t.tv_nsec = nsec; + + return t; + } + + struct timespec CalcDuration() const { + timespec diff; + if ((_stop.tv_nsec - _start.tv_nsec)<0) { + diff.tv_sec = _stop.tv_sec - _start.tv_sec - 1; + diff.tv_nsec = 1000000000 + _stop.tv_nsec - _start.tv_nsec; + } else { + diff.tv_sec = _stop.tv_sec - _start.tv_sec; + diff.tv_nsec = _stop.tv_nsec - _start.tv_nsec; + } + return diff; + } + + struct timespec _start; + struct timespec _stop; + struct timespec _acc; +}; + +class Benchmark { +public: + Benchmark(const PatternSet& pat_set, const char* infile): + _pat_set(pat_set), _infile(infile) { + _mmap = (char*)MAP_FAILED; + _file_sz = 0; + _fd = -1; + } + + ~Benchmark() { + if (_mmap != MAP_FAILED) + munmap(_mmap, _file_sz); + if (_fd != -1) + close(_fd); + } + + bool Run(int iteration); + const Timer& getTimer() const { return _timer; } + +private: + const PatternSet& _pat_set; + const char* _infile; + char* _mmap; + int _fd; + size_t _file_sz; // input file size + Timer _timer; +}; + +bool +Benchmark::Run(int iteration) { + if (_pat_set.getPatternNum() <= 0) { + SomethingWrong = true; + return false; + } + + if (_mmap == MAP_FAILED) { + struct stat filestat; + if (stat(_infile, &filestat)) { + SomethingWrong = true; + return false; + } + + if (!S_ISREG(filestat.st_mode)) { + SomethingWrong = true; + return false; + } + + _fd = open(_infile, 0); + if (_fd == -1) + return false; + + _mmap = (char*)mmap(0, filestat.st_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE, _fd, 0); + + if (_mmap == MAP_FAILED) { + SomethingWrong = true; + return false; + } + + _file_sz = filestat.st_size; + } + + ac_t* ac = ac_create(_pat_set.getPatternVector(), + _pat_set.getPatternLenVector(), + _pat_set.getPatternNum()); + if (!ac) { + SomethingWrong = true; + return false; + } + + int piece_num = _file_sz/piece_size; + + _timer.Start(false); + + /* Stupid compiler may not be able to promote piece_size into register. + * Do it manually. + */ + int piece_sz = piece_size; + for (int i = 0; i < iteration; i++) { + size_t match_ofst = 0; + for (int piece_idx = 0; piece_idx < piece_num; piece_idx ++) { + ac_match2(ac, _mmap + match_ofst, piece_sz); + match_ofst += piece_sz; + } + if (match_ofst != _file_sz) + ac_match2(ac, _mmap + match_ofst, _file_sz - match_ofst); + } + _timer.Stop(); + return true; +} + +const char* short_opt = "hd:f:i:p:"; +const struct option long_opts[] = { + {"help", no_argument, 0, 'h'}, + {"iteration", required_argument, 0, 'i'}, + {"dictionary-dir", required_argument, 0, 'd'}, + {"obj-file-dir", required_argument, 0, 'f'}, + {"piece-size", required_argument, 0, 'p'}, +}; + +static void +PrintHelp(const char* prog_name) { + const char* msg = +"Usage %s [OPTIONS]\n" +" -d, --dictionary-dir : specify the dictionary directory (./dict by default)\n" +" -f, --obj-file-dir : specify the object file directory\n" +" (./testinput by default)\n" +" -i, --iteration : Run this many iteration for each pattern match\n" +" -p, --piece-size : The size of 'piece' in byte. The input file is\n" +" divided into pieces, and match function is working\n" +" on one piece at a time. The default size of piece\n" +" is 1k byte.\n"; + + fprintf(stdout, msg, prog_name); +} + +static bool +getOptions(int argc, char** argv) { + bool dict_dir_set = false; + bool objfile_dir_set = false; + int opt_index; + + while (1) { + if (print_help) break; + + int c = getopt_long(argc, argv, short_opt, long_opts, &opt_index); + + if (c == -1) break; + if (c == 0) { c = long_opts[opt_index].val; } + + switch(c) { + case 'h': + print_help = true; + break; + + case 'i': + iteration = atol(optarg); + break; + + case 'd': + dict_dir = optarg; + dict_dir_set = true; + break; + + case 'f': + obj_file_dir = optarg; + objfile_dir_set = true; + break; + + case 'p': + piece_size = atol(optarg); + break; + + case '?': + default: + return false; + } + } + + if (print_help) + return true; + + string basedir(dirname(argv[0])); + if (!dict_dir_set) + dict_dir = basedir + "/dict"; + + if (!objfile_dir_set) + obj_file_dir = basedir + "/testinput"; + + return true; +} + +int +main(int argc, char** argv) { + if (!getOptions(argc, argv)) + return -1; + + if (print_help) { + PrintHelp(argv[0]); + return 0; + } + +#ifndef __linux + fprintf(stdout, "\n!!!WARNING: On this OS, the execution time is measured" + " by gettimeofday(2) which is imprecise!!!\n\n"); +#endif + + fprintf(stdout, "Test with iteration = %d, piece size = %d, and", + iteration, piece_size); + fprintf(stdout, "\n dictionary dir = %s\n object file dir = %s\n\n", + dict_dir.c_str(), obj_file_dir.c_str()); + + vector dict_files; + vector input_files; + + if (!getFilesUnderDir(dict_files, dict_dir.c_str())) { + fprintf(stdout, "fail to find dictionary files\n"); + return -1; + } + + if (!getFilesUnderDir(input_files, obj_file_dir.c_str())) { + fprintf(stdout, "fail to find test input files\n"); + return -1; + } + + for (vector::iterator diter = dict_files.begin(), + diter_e = dict_files.end(); diter != diter_e; ++diter) { + + const char* dict_name = diter->c_str(); + if (!PatternSet::isDictFile(dict_name)) + continue; + + PatternSet ps(dict_name); + if (ps.getPatternNum() <= 0) { + fprintf(stdout, "fail to open dictionary file %s : %s\n", + dict_name, ps.getErrMessage()); + SomethingWrong = true; + continue; + } + + fprintf(stdout, "Using dictionary %s\n", dict_name); + Timer timer; + for (vector::iterator iter = input_files.begin(), + iter_e = input_files.end(); iter != iter_e; ++iter) { + fprintf(stdout, " testing %s ... ", iter->c_str()); + fflush(stdout); + Benchmark bm(ps, iter->c_str()); + bm.Run(iteration); + const Timer& t = bm.getTimer(); + timer += bm.getTimer(); + fprintf(stdout, "elapsed %.3f\n", t.getDuration() / 1000000.0); + } + + fprintf(stdout, + "\n==========================================================\n" + " Total Elapse %.3f\n\n", timer.getDuration() / 1000000.0); + } + + return SomethingWrong ? -1 : 0; +} -- cgit v1.2.3