diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib-fts/fts-tokenizer-private.h | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/src/lib-fts/fts-tokenizer-private.h b/src/lib-fts/fts-tokenizer-private.h new file mode 100644 index 0000000..b7615b1 --- /dev/null +++ b/src/lib-fts/fts-tokenizer-private.h @@ -0,0 +1,52 @@ +#ifndef FTS_TOKENIZER_PRIVATE_H +#define FTS_TOKENIZER_PRIVATE_H + +#include "fts-tokenizer.h" + +#define FTS_TOKENIZER_CLASSES_NR 2 + +struct fts_tokenizer_vfuncs { + int (*create)(const char *const *settings, + struct fts_tokenizer **tokenizer_r, const char **error_r); + void (*destroy)(struct fts_tokenizer *tok); + + void (*reset)(struct fts_tokenizer *tok); + int (*next)(struct fts_tokenizer *tok, const unsigned char *data, + size_t size, size_t *skip_r, const char **token_r, + const char **error_r); +}; + +enum fts_tokenizer_parent_state { + FTS_TOKENIZER_PARENT_STATE_ADD_DATA = 0, + FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT, + FTS_TOKENIZER_PARENT_STATE_FINALIZE +}; + +struct fts_tokenizer { + const char *name; + const struct fts_tokenizer_vfuncs *v; + int refcount; + + struct fts_tokenizer *parent; + buffer_t *parent_input; + enum fts_tokenizer_parent_state parent_state; + + const unsigned char *prev_data; + size_t prev_size; + size_t prev_skip; + bool prev_reply_finished; + bool skip_parents; /* Return token as is, do not hand to parents. */ + /* Instead of handing child tokens separately to parent tokenizer, + treat the returned tokens as a continuous stream. The final token + isn't returned until the child tokenizer also sees 0-sized data. */ + bool stream_to_parents; + /* Parent stream still needs to be finalized, so any final pending + tokens will be returned. This is used only with + stream_to_parents=TRUE. */ + bool finalize_parent_pending; +}; + +void fts_tokenizer_register(const struct fts_tokenizer *tok_class); +void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class); + +#endif |