diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
commit | 830407e88f9d40d954356c3754f2647f91d5c06a (patch) | |
tree | d6a0ece6feea91f3c656166dbaa884ef8a29740e /modules/policy/lua-aho-corasick/ac_fast.hpp | |
parent | Initial commit. (diff) | |
download | knot-resolver-upstream.tar.xz knot-resolver-upstream.zip |
Adding upstream version 5.6.0.upstream/5.6.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'modules/policy/lua-aho-corasick/ac_fast.hpp')
-rw-r--r-- | modules/policy/lua-aho-corasick/ac_fast.hpp | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/modules/policy/lua-aho-corasick/ac_fast.hpp b/modules/policy/lua-aho-corasick/ac_fast.hpp new file mode 100644 index 0000000..9ac557c --- /dev/null +++ b/modules/policy/lua-aho-corasick/ac_fast.hpp @@ -0,0 +1,124 @@ +#ifndef AC_FAST_H +#define AC_FAST_H + +#include <vector> +#include "ac.h" +#include "ac_slow.hpp" + +using namespace std; + +class ACS_Constructor; + +typedef uint32 AC_Ofst; +typedef uint32 State_ID; + +// The entire "fast" AC graph is converted from its "slow" version, and store +// in an consecutive trunk of memory or "buffer". Since the pointers in the +// fast AC graph are represented as offset relative to the base address of +// the buffer, this fast AC graph is position-independent, meaning cloning +// the fast graph is just to memcpy the entire buffer. +// +// The buffer is laid-out as following: +// +// 1. The buffer header. (i.e. the AC_Buffer content) +// 2. root-node's goto functions. It is represented as an array indiced by +// root-node's valid inputs, and the element is the ID of the corresponding +// transition state (aka kid). To save space, we used 8-bit to represent +// the IDs. ID of root's kids starts with 1. +// +// Root may have 255 valid inputs. In this speical case, i-th element +// stores value i -- i.e the i-th state. So, we don't need such array +// at all. On the other hand, 8-bit is insufficient to encode kids' ID. +// +// 3. An array indiced by state's id, and the element is the offset +// of correspoding state wrt the base address of the buffer. +// +// 4. the contents of states. +// +typedef struct { + buf_header_t hdr; // The header exposed to the user using this lib. +#ifdef VERIFY + ACS_Constructor* slow_impl; +#endif + uint32 buf_len; + AC_Ofst root_goto_ofst; // addr of root node's goto() function. + AC_Ofst states_ofst_ofst; // addr of state pointer vector (indiced by id) + AC_Ofst first_state_ofst; // addr of the first state in the buffer. + uint16 root_goto_num; // fan-out of root-node. + uint16 state_num; // number of states + + // Followed by the gut of the buffer: + // 1. map: root's-valid-input -> kid's id + // 2. map: state's ID -> offset of the state + // 3. states' content. +} AC_Buffer; + +// Depict the state of "fast" AC graph. +typedef struct { + // transition are sorted. For instance, state s1, has two transitions : + // goto(b) -> S_b, goto(a)->S_a. The inputs are sorted in the ascending + // order, and the target states are permuted accordingly. In this case, + // the inputs are sorted as : a, b, and the target states are permuted + // into S_a, S_b. So, S_a is the 1st kid, the ID of kids are consecutive, + // so we don't need to save all the target kids. + // + State_ID first_kid; + AC_Ofst fail_link; + short depth; // How far away from root. + unsigned short is_term; // Is terminal node. if is_term != 0, it encodes + // the value of "1 + pattern-index". + unsigned char goto_num; // The number of valid transition. + InputTy input_vect[1]; // Vector of valid input. Must be last field! +} AC_State; + +class Buf_Allocator { +public: + Buf_Allocator() : _buf(0) {} + virtual ~Buf_Allocator() { free(); } + + virtual AC_Buffer* alloc(int sz) = 0; + virtual void free() {}; +protected: + AC_Buffer* _buf; +}; + +// Convert slow-AC-graph into fast one. +class AC_Converter { +public: + AC_Converter(ACS_Constructor& acs, Buf_Allocator& ba) : + _acs(acs), _buf_alloc(ba) {} + AC_Buffer* Convert(); + +private: + // Return the size in byte needed to to save the specified state. + uint32 Calc_State_Sz(const ACS_State *) const; + + // In fast-AC-graph, the ID is bit trikcy. Given a state of slow-graph, + // this function is to return the ID of its counterpart in the fast-graph. + State_ID Get_Renumbered_Id(const ACS_State *s) const { + const vector<uint32> &m = _id_map; + return m[s->Get_ID()]; + } + + AC_Buffer* Alloc_Buffer(); + void Populate_Root_Goto_Func(AC_Buffer *, GotoVect&); + +#ifdef DEBUG + void dump_buffer(AC_Buffer*, FILE*); +#endif + +private: + ACS_Constructor& _acs; + Buf_Allocator& _buf_alloc; + + // map: ID of state in slow-graph -> ID of counterpart in fast-graph. + vector<uint32> _id_map; + + // map: ID of state in slow-graph -> offset of counterpart in fast-graph. + vector<AC_Ofst> _ofst_map; +}; + +ac_result_t Match(AC_Buffer* buf, const char* str, uint32 len); +ac_result_t Match_Longest_L(AC_Buffer* buf, const char* str, uint32 len); + +#endif // AC_FAST_H |