diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/lib-fts/fts-tokenizer-generic-private.h | |
parent | Initial commit. (diff) | |
download | dovecot-upstream.tar.xz dovecot-upstream.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/lib-fts/fts-tokenizer-generic-private.h')
-rw-r--r-- | src/lib-fts/fts-tokenizer-generic-private.h | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/lib-fts/fts-tokenizer-generic-private.h b/src/lib-fts/fts-tokenizer-generic-private.h new file mode 100644 index 0000000..87f4d48 --- /dev/null +++ b/src/lib-fts/fts-tokenizer-generic-private.h @@ -0,0 +1,57 @@ +#ifndef FTS_TOKENIZER_GENERIC_PRIVATE_H +#define FTS_TOKENIZER_GENERIC_PRIVATE_H + +extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_simple; +extern const struct fts_tokenizer_vfuncs generic_tokenizer_vfuncs_tr29; + +/* Word boundary letter type */ +enum letter_type { + LETTER_TYPE_NONE = 0, + LETTER_TYPE_CR, + LETTER_TYPE_LF, + LETTER_TYPE_NEWLINE, + LETTER_TYPE_EXTEND, + LETTER_TYPE_REGIONAL_INDICATOR, + LETTER_TYPE_FORMAT, + LETTER_TYPE_KATAKANA, + LETTER_TYPE_HEBREW_LETTER, + LETTER_TYPE_ALETTER, + LETTER_TYPE_SINGLE_QUOTE, + LETTER_TYPE_DOUBLE_QUOTE, + LETTER_TYPE_MIDNUMLET, + LETTER_TYPE_MIDLETTER, + LETTER_TYPE_MIDNUM, + LETTER_TYPE_NUMERIC, + LETTER_TYPE_EXTENDNUMLET, + LETTER_TYPE_SOT, + LETTER_TYPE_EOT, + LETTER_TYPE_APOSTROPHE, /* Own modification to TR29 */ + LETTER_TYPE_PREFIXSPLAT, /* Dovecot '*' for glob-like explicit prefix searching */ + LETTER_TYPE_OTHER /* WB14 "any" */ +}; + +enum boundary_algorithm { + BOUNDARY_ALGORITHM_NONE = 0, + BOUNDARY_ALGORITHM_SIMPLE, +#define ALGORITHM_SIMPLE_NAME "simple" + BOUNDARY_ALGORITHM_TR29 +#define ALGORITHM_TR29_NAME "tr29" +}; + +struct generic_fts_tokenizer { + struct fts_tokenizer tokenizer; + unsigned int max_length; + bool prefixsplat; /* for search strings, accept a trailing '*' for explicit prefix */ + bool wb5a; /* TR29 rule for prefix separation + in e.g. French or Italian. */ + bool seen_wb5a; + unichar_t prev_letter; + unichar_t letter; + enum boundary_algorithm algorithm; + enum letter_type prev_type; + enum letter_type prev_prev_type; + size_t untruncated_length; + buffer_t *token; +}; + +#endif |