summaryrefslogtreecommitdiffstats
path: root/src/lib-fts/fts-tokenizer-private.h
blob: b7615b17be364aae14fcc1b215615589c4682b9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#ifndef FTS_TOKENIZER_PRIVATE_H
#define FTS_TOKENIZER_PRIVATE_H

#include "fts-tokenizer.h"

#define FTS_TOKENIZER_CLASSES_NR 2

struct fts_tokenizer_vfuncs {
	int (*create)(const char *const *settings,
		      struct fts_tokenizer **tokenizer_r, const char **error_r);
	void (*destroy)(struct fts_tokenizer *tok);

	void (*reset)(struct fts_tokenizer *tok);
	int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
		    size_t size, size_t *skip_r, const char **token_r,
		    const char **error_r);
};

enum fts_tokenizer_parent_state {
	FTS_TOKENIZER_PARENT_STATE_ADD_DATA = 0,
	FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT,
	FTS_TOKENIZER_PARENT_STATE_FINALIZE
};

struct fts_tokenizer {
	const char *name;
	const struct fts_tokenizer_vfuncs *v;
	int refcount;

	struct fts_tokenizer *parent;
	buffer_t *parent_input;
	enum fts_tokenizer_parent_state parent_state;

	const unsigned char *prev_data;
	size_t prev_size;
	size_t prev_skip;
	bool prev_reply_finished;
	bool skip_parents; /* Return token as is, do not hand to parents. */
	/* Instead of handing child tokens separately to parent tokenizer,
	   treat the returned tokens as a continuous stream. The final token
	   isn't returned until the child tokenizer also sees 0-sized data. */
	bool stream_to_parents;
	/* Parent stream still needs to be finalized, so any final pending
	   tokens will be returned. This is used only with
	   stream_to_parents=TRUE. */
	bool finalize_parent_pending;
};

void fts_tokenizer_register(const struct fts_tokenizer *tok_class);
void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class);

#endif