blob: b7615b17be364aae14fcc1b215615589c4682b9c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#ifndef FTS_TOKENIZER_PRIVATE_H
#define FTS_TOKENIZER_PRIVATE_H
#include "fts-tokenizer.h"
#define FTS_TOKENIZER_CLASSES_NR 2
struct fts_tokenizer_vfuncs {
int (*create)(const char *const *settings,
struct fts_tokenizer **tokenizer_r, const char **error_r);
void (*destroy)(struct fts_tokenizer *tok);
void (*reset)(struct fts_tokenizer *tok);
int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
size_t size, size_t *skip_r, const char **token_r,
const char **error_r);
};
enum fts_tokenizer_parent_state {
FTS_TOKENIZER_PARENT_STATE_ADD_DATA = 0,
FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT,
FTS_TOKENIZER_PARENT_STATE_FINALIZE
};
struct fts_tokenizer {
const char *name;
const struct fts_tokenizer_vfuncs *v;
int refcount;
struct fts_tokenizer *parent;
buffer_t *parent_input;
enum fts_tokenizer_parent_state parent_state;
const unsigned char *prev_data;
size_t prev_size;
size_t prev_skip;
bool prev_reply_finished;
bool skip_parents; /* Return token as is, do not hand to parents. */
/* Instead of handing child tokens separately to parent tokenizer,
treat the returned tokens as a continuous stream. The final token
isn't returned until the child tokenizer also sees 0-sized data. */
bool stream_to_parents;
/* Parent stream still needs to be finalized, so any final pending
tokens will be returned. This is used only with
stream_to_parents=TRUE. */
bool finalize_parent_pending;
};
void fts_tokenizer_register(const struct fts_tokenizer *tok_class);
void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class);
#endif
|