summaryrefslogtreecommitdiffstats
path: root/contrib/snowball/compiler/header.h
blob: dadbee36cae114f97dbdd2cb8caec8b0bdf0c726 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
#include <stdio.h>

#define SNOWBALL_VERSION "2.0.0"

typedef unsigned char byte;
typedef unsigned short symbol;

#define true 1
#define false 0

#define MALLOC check_malloc
#define FREE check_free

#define NEW(type, p) struct type * p = (struct type *) MALLOC(sizeof(struct type))
#define NEWVEC(type, p, n) struct type * p = (struct type *) MALLOC(sizeof(struct type) * (n))

#define SIZE(p)     ((int *)(p))[-1]
#define CAPACITY(p) ((int *)(p))[-2]

extern symbol * create_b(int n);
extern void report_b(FILE * out, const symbol * p);
extern void lose_b(symbol * p);
extern symbol * increase_capacity(symbol * p, int n);
extern symbol * move_to_b(symbol * p, int n, const symbol * q);
extern symbol * add_to_b(symbol * p, int n, const symbol * q);
extern symbol * copy_b(const symbol * p);
extern char * b_to_s(const symbol * p);
extern symbol * add_s_to_b(symbol * p, const char * s);

#define MOVE_TO_B(B, LIT) \
    move_to_b(B, sizeof(LIT) / sizeof(LIT[0]), LIT)

struct str; /* defined in space.c */

extern struct str * str_new(void);
extern void str_delete(struct str * str);
extern void str_append(struct str * str, const struct str * add);
extern void str_append_ch(struct str * str, char add);
extern void str_append_b(struct str * str, const symbol * q);
extern void str_append_b_tail(struct str * str, const symbol * q, int skip);
extern void str_append_string(struct str * str, const char * s);
extern void str_append_int(struct str * str, int i);
extern void str_clear(struct str * str);
extern void str_assign(struct str * str, const char * s);
extern struct str * str_copy(const struct str * old);
extern symbol * str_data(const struct str * str);
extern int str_len(const struct str * str);
extern int str_back(const struct str *str);
extern int get_utf8(const symbol * p, int * slot);
extern int put_utf8(int ch, symbol * p);
extern void output_str(FILE * outfile, struct str * str);

typedef enum { ENC_SINGLEBYTE, ENC_UTF8, ENC_WIDECHARS } enc;

struct m_pair {

    struct m_pair * next;
    symbol * name;
    symbol * value;

};

/* struct input must be a prefix of struct tokeniser. */
struct input {

    struct input * next;
    symbol * p;
    int c;
    char * file;
    int file_needs_freeing;
    int line_number;

};

struct include {

    struct include * next;
    symbol * b;

};

enum token_codes {

#include "syswords2.h"

    c_mathassign,
    c_name,
    c_number,
    c_literalstring,
    c_neg,
    c_call,
    c_grouping,
    c_booltest,

    NUM_TOKEN_CODES
};

enum uplus_modes {
    UPLUS_NONE,
    UPLUS_DEFINED,
    UPLUS_UNICODE
};

/* struct input must be a prefix of struct tokeniser. */
struct tokeniser {

    struct input * next;
    symbol * p;
    int c;
    char * file;
    int file_needs_freeing;
    int line_number;
    symbol * b;
    symbol * b2;
    int number;
    int m_start;
    int m_end;
    struct m_pair * m_pairs;
    int get_depth;
    int error_count;
    int token;
    int previous_token;
    byte token_held;
    enc encoding;

    int omission;
    struct include * includes;

    /* Mode in which U+ has been used:
     * UPLUS_NONE - not used yet
     * UPLUS_DEFINED - stringdef U+xxxx ....
     * UPLUS_UNICODE - {U+xxxx} used with implicit meaning
     */
    int uplusmode;

    char token_disabled[NUM_TOKEN_CODES];
};

extern symbol * get_input(const char * filename);
extern struct tokeniser * create_tokeniser(symbol * b, char * file);
extern int read_token(struct tokeniser * t);
extern const char * name_of_token(int code);
extern void disable_token(struct tokeniser * t, int code);
extern void close_tokeniser(struct tokeniser * t);

extern int space_count;
extern void * check_malloc(int n);
extern void check_free(void * p);

struct node;

struct name {

    struct name * next;
    symbol * b;
    int type;                   /* t_string etc */
    int mode;                   /*    )_  for routines, externals */
    struct node * definition;   /*    )                           */
    int count;                  /* 0, 1, 2 for each type */
    struct grouping * grouping; /* for grouping names */
    byte referenced;
    byte used_in_among;         /* Function used in among? */
    byte value_used;            /* (For variables) is its value ever used? */
    byte initialised;           /* (For variables) is it ever initialised? */
    byte used_in_definition;    /* (grouping) used in grouping definition? */
    struct node * used;         /* First use, or NULL if not used */
    struct name * local_to;     /* Local to one routine/external */
    int declaration_line_number;/* Line number of declaration */

};

struct literalstring {

    struct literalstring * next;
    symbol * b;

};

struct amongvec {

    symbol * b;      /* the string giving the case */
    int size;        /* - and its size */
    struct node * action; /* the corresponding action */
    int i;           /* the amongvec index of the longest substring of b */
    int result;      /* the numeric result for the case */
    int line_number; /* for diagnostics and stable sorting */
    struct name * function;

};

struct among {

    struct among * next;
    struct amongvec * b;      /* pointer to the amongvec */
    int number;               /* amongs are numbered 0, 1, 2 ... */
    int literalstring_count;  /* in this among */
    int command_count;        /* in this among */
    int nocommand_count;      /* number of "no command" entries in this among */
    int function_count;       /* in this among */
    int amongvar_needed;      /* do we need to set among_var? */
    struct node * starter;    /* i.e. among( (starter) 'string' ... ) */
    struct node * substring;  /* i.e. substring ... among ( ... ) */
    struct node ** commands;  /* array with command_count entries */
};

struct grouping {

    struct grouping * next;
    symbol * b;               /* the characters of this group */
    int largest_ch;           /* character with max code */
    int smallest_ch;          /* character with min code */
    struct name * name;       /* so g->name->grouping == g */
    int line_number;
};

struct node {

    struct node * next;
    struct node * left;
    struct node * aux;     /* used in setlimit */
    struct among * among;  /* used in among */
    struct node * right;
    int type;
    int mode;
    struct node * AE;
    struct name * name;
    symbol * literalstring;
    int number;
    int line_number;
    int amongvar_needed;   /* used in routine definitions */
};

enum name_types {

    t_size = 6,

    t_string = 0, t_boolean = 1, t_integer = 2, t_routine = 3, t_external = 4,
    t_grouping = 5

/*  If this list is extended, adjust wvn in generator.c  */
};

/*  In name_count[i] below, remember that
    type   is
    ----+----
      0 |  string
      1 |  boolean
      2 |  integer
      3 |  routine
      4 |  external
      5 |  grouping
*/

struct analyser {

    struct tokeniser * tokeniser;
    struct node * nodes;
    struct name * names;
    struct literalstring * literalstrings;
    int mode;
    byte modifyable;          /* false inside reverse(...) */
    struct node * program;
    struct node * program_end;
    int name_count[t_size];   /* name_count[i] counts the number of names of type i */
    struct among * amongs;
    struct among * amongs_end;
    int among_count;
    int amongvar_needed;      /* used in reading routine definitions */
    struct grouping * groupings;
    struct grouping * groupings_end;
    struct node * substring;  /* pending 'substring' in current routine definition */
    enc encoding;
    byte int_limits_used;     /* are maxint or minint used? */
};

enum analyser_modes {

    m_forward = 0, m_backward /*, m_integer */

};

extern void print_program(struct analyser * a);
extern struct analyser * create_analyser(struct tokeniser * t);
extern void close_analyser(struct analyser * a);

extern void read_program(struct analyser * a);

struct generator {

    struct analyser * analyser;
    struct options * options;
    int unreachable;           /* 0 if code can be reached, 1 if current code
                                * is unreachable. */
    int var_number;            /* Number of next variable to use. */
    struct str * outbuf;       /* temporary str to store output */
    struct str * declarations; /* str storing variable declarations */
    int next_label;
#ifndef DISABLE_PYTHON
    int max_label;
#endif
    int margin;

    /* if > 0, keep_count to restore in case of a failure;
     * if < 0, the negated keep_count for the limit to restore in case of
     * failure. */
    int failure_keep_count;
#if !defined(DISABLE_JAVA) && !defined(DISABLE_JS) && !defined(DISABLE_PYTHON) && !defined(DISABLE_CSHARP)
    struct str * failure_str;  /* This is used by some generators instead of failure_keep_count */
#endif

    int label_used;     /* Keep track of whether the failure label is used. */
    int failure_label;
    int debug_count;
    int copy_from_count; /* count of calls to copy_from() */

    const char * S[10];  /* strings */
    symbol * B[10];      /* blocks */
    int I[10];           /* integers */
    struct name * V[5];  /* variables */
    symbol * L[5];       /* literals, used in formatted write */

    int line_count;      /* counts number of lines output */
    int line_labelled;   /* in ISO C, will need extra ';' if it is a block end */
    int literalstring_count;
    int keep_count;      /* used to number keep/restore pairs to avoid compiler warnings
                            about shadowed variables */
};

/* Special values for failure_label in struct generator. */
enum special_labels {
    x_return = -1
};

struct options {

    /* for the command line: */

    const char * output_file;
    const char * name;
    FILE * output_src;
    FILE * output_h;
    byte syntax_tree;
    byte comments;
    enc encoding;
    enum { LANG_JAVA, LANG_C, LANG_CPLUSPLUS, LANG_CSHARP, LANG_PASCAL, LANG_PYTHON, LANG_JAVASCRIPT, LANG_RUST, LANG_GO } make_lang;
    const char * externals_prefix;
    const char * variables_prefix;
    const char * runtime_path;
    const char * parent_class_name;
    const char * package;
    const char * go_snowball_runtime;
    const char * string_class;
    const char * among_class;
    struct include * includes;
    struct include * includes_end;
};

/* Generator functions common to several backends. */

extern struct generator * create_generator(struct analyser * a, struct options * o);
extern void close_generator(struct generator * g);

extern void write_char(struct generator * g, int ch);
extern void write_newline(struct generator * g);
extern void write_string(struct generator * g, const char * s);
extern void write_int(struct generator * g, int i);
extern void write_b(struct generator * g, symbol * b);
extern void write_str(struct generator * g, struct str * str);

extern void write_comment_content(struct generator * g, struct node * p);
extern void write_generated_comment_content(struct generator * g);
extern void write_start_comment(struct generator * g,
                                const char * comment_start,
                                const char * comment_end);

extern int K_needed(struct generator * g, struct node * p);
extern int repeat_restore(struct generator * g, struct node * p);

/* Generator for C code. */
extern void generate_program_c(struct generator * g);

#ifndef DISABLE_JAVA
/* Generator for Java code. */
extern void generate_program_java(struct generator * g);
#endif

#ifndef DISABLE_CSHARP
/* Generator for C# code. */
extern void generate_program_csharp(struct generator * g);
#endif

#ifndef DISABLE_PASCAL
extern void generate_program_pascal(struct generator * g);
#endif

#ifndef DISABLE_PYTHON
/* Generator for Python code. */
extern void generate_program_python(struct generator * g);
#endif

#ifndef DISABLE_JS
extern void generate_program_js(struct generator * g);
#endif

#ifndef DISABLE_RUST
extern void generate_program_rust(struct generator * g);
#endif

#ifndef DISABLE_GO
extern void generate_program_go(struct generator * g);
#endif