diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 12:06:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 12:06:34 +0000 |
commit | 5e61585d76ae77fd5e9e96ebabb57afa4d74880d (patch) | |
tree | 2b467823aaeebc7ef8bc9e3cabe8074eaef1666d /src/global/tok822_parse.c | |
parent | Initial commit. (diff) | |
download | postfix-5e61585d76ae77fd5e9e96ebabb57afa4d74880d.tar.xz postfix-5e61585d76ae77fd5e9e96ebabb57afa4d74880d.zip |
Adding upstream version 3.5.24.upstream/3.5.24
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/global/tok822_parse.c')
-rw-r--r-- | src/global/tok822_parse.c | 726 |
1 files changed, 726 insertions, 0 deletions
diff --git a/src/global/tok822_parse.c b/src/global/tok822_parse.c new file mode 100644 index 0000000..4376802 --- /dev/null +++ b/src/global/tok822_parse.c @@ -0,0 +1,726 @@ +/*++ +/* NAME +/* tok822_parse 3 +/* SUMMARY +/* RFC 822 address parser +/* SYNOPSIS +/* #include <tok822.h> +/* +/* TOK822 *tok822_scan_limit(str, tailp, limit) +/* const char *str; +/* TOK822 **tailp; +/* int limit; +/* +/* TOK822 *tok822_scan(str, tailp) +/* const char *str; +/* TOK822 **tailp; +/* +/* TOK822 *tok822_parse_limit(str, limit) +/* const char *str; +/* int limit; +/* +/* TOK822 *tok822_parse(str) +/* const char *str; +/* +/* TOK822 *tok822_scan_addr(str) +/* const char *str; +/* +/* VSTRING *tok822_externalize(buffer, tree, flags) +/* VSTRING *buffer; +/* TOK822 *tree; +/* int flags; +/* +/* VSTRING *tok822_internalize(buffer, tree, flags) +/* VSTRING *buffer; +/* TOK822 *tree; +/* int flags; +/* DESCRIPTION +/* This module converts address lists between string form and parse +/* tree formats. The string form can appear in two different ways: +/* external (or quoted) form, as used in message headers, and internal +/* (unquoted) form, as used internally by the mail software. +/* Although RFC 822 expects 7-bit data, these routines pay no +/* special attention to 8-bit characters. +/* +/* tok822_scan() converts the external-form string in \fIstr\fR +/* to a linear token list. The \fItailp\fR argument is a null pointer +/* or receives the pointer value of the last result list element. +/* +/* tok822_scan_limit() implements tok822_scan(), which is a macro. +/* The \fIlimit\fR argument is either zero or an upper bound on the +/* number of tokens produced. +/* +/* tok822_parse() converts the external-form address list in +/* \fIstr\fR to the corresponding token tree. The parser is permissive +/* and will not throw away information that it does not understand. +/* The parser adds missing commas between addresses. +/* +/* tok822_parse_limit() implements tok822_parse(), which is a macro. +/* The \fIlimit\fR argument is either zero or an upper bound on the +/* number of tokens produced. +/* +/* tok822_scan_addr() converts the external-form string in +/* \fIstr\fR to an address token tree. This is just string to +/* token list conversion; no parsing is done. This routine is +/* suitable for data that should contain just one address and no +/* other information. +/* +/* tok822_externalize() converts a token list to external form. +/* Where appropriate, characters and strings are quoted and white +/* space is inserted. The \fIflags\fR argument is the binary OR of +/* zero or more of the following: +/* .IP TOK822_STR_WIPE +/* Initially, truncate the result to zero length. +/* .IP TOK822_STR_TERM +/* Append a null terminator to the result when done. +/* .IP TOK822_STR_LINE +/* Append a line break after each comma token, instead of appending +/* whitespace. It is up to the caller to concatenate short lines to +/* produce longer ones. +/* .IP TOK822_STR_TRNC +/* Truncate non-address information to 250 characters per address, to +/* protect Sendmail systems that are vulnerable to the problem in CERT +/* advisory CA-2003-07. +/* This flag has effect with tok822_externalize() only. +/* .PP +/* The macro TOK_822_NONE expresses that none of the above features +/* should be activated. +/* +/* The macro TOK822_STR_DEFL combines the TOK822_STR_WIPE and +/* TOK822_STR_TERM flags. This is useful for most token to string +/* conversions. +/* +/* The macro TOK822_STR_HEAD combines the TOK822_STR_TERM, +/* TOK822_STR_LINE and TOK822_STR_TRNC flags. This is useful for +/* the special case of token to mail header conversion. +/* +/* tok822_internalize() converts a token list to string form, +/* without quoting. White space is inserted where appropriate. +/* The \fIflags\fR argument is as with tok822_externalize(). +/* STANDARDS +/* .ad +/* .fi +/* RFC 822 (ARPA Internet Text Messages). In addition to this standard +/* this module implements additional operators such as % and !. These +/* are needed because the real world is not all RFC 822. Also, the ':' +/* operator is allowed to appear inside addresses, to accommodate DECnet. +/* In addition, 8-bit data is not given special treatment. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/*--*/ + +/* System library. */ + +#include <sys_defs.h> +#include <ctype.h> +#include <string.h> + +/* Utility library. */ + +#include <vstring.h> +#include <msg.h> +#include <stringops.h> + +/* Global library. */ + +#include "lex_822.h" +#include "quote_822_local.h" +#include "tok822.h" + + /* + * I suppose this is my favorite macro. Used heavily for tokenizing. + */ +#define COLLECT(t,s,c,cond) { \ + while ((c = *(unsigned char *) s) != 0) { \ + if (c == '\\') { \ + if ((c = *(unsigned char *)++s) == 0) \ + break; \ + } else if (!(cond)) { \ + break; \ + } \ + VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \ + s++; \ + } \ + VSTRING_TERMINATE(t->vstr); \ + } + +#define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; } + + /* + * Not quite as complex. The parser depends heavily on it. + */ +#define SKIP(tp, cond) { \ + while (tp->type && (cond)) \ + tp = tp->prev; \ + } + +#define MOVE_COMMENT_AND_CONTINUE(tp, right) { \ + TOK822 *prev = tok822_unlink(tp); \ + right = tok822_prepend(right, tp); \ + tp = prev; \ + continue; \ + } + +#define SKIP_MOVE_COMMENT(tp, cond, right) { \ + while (tp->type && (cond)) { \ + if (tp->type == TOK822_COMMENT) \ + MOVE_COMMENT_AND_CONTINUE(tp, right); \ + tp = tp->prev; \ + } \ + } + + /* + * Single-character operators. We include the % and ! operators because not + * all the world is RFC822. XXX Make this operator list configurable when we + * have a real rewriting language. Include | for aliases file parsing. + */ +static char tok822_opchar[] = "|%!" LEX_822_SPECIALS; +static void tok822_quote_atom(TOK822 *); +static const char *tok822_comment(TOK822 *, const char *); +static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int); +static void tok822_copy_quoted(VSTRING *, char *, char *); +static int tok822_append_space(TOK822 *); + +#define DO_WORD (1<<0) /* finding a word is ok here */ +#define DO_GROUP (1<<1) /* doing an address group */ + +#define ADD_COMMA ',' /* resynchronize */ +#define NO_MISSING_COMMA 0 + +/* tok822_internalize - token tree to string, internal form */ + +VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags) +{ + TOK822 *tp; + + if (flags & TOK822_STR_WIPE) + VSTRING_RESET(vp); + + for (tp = tree; tp; tp = tp->next) { + switch (tp->type) { + case ',': + VSTRING_ADDCH(vp, tp->type); + if (flags & TOK822_STR_LINE) { + VSTRING_ADDCH(vp, '\n'); + continue; + } + break; + case TOK822_ADDR: + tok822_internalize(vp, tp->head, TOK822_STR_NONE); + break; + case TOK822_COMMENT: + case TOK822_ATOM: + case TOK822_QSTRING: + vstring_strcat(vp, vstring_str(tp->vstr)); + break; + case TOK822_DOMLIT: + VSTRING_ADDCH(vp, '['); + vstring_strcat(vp, vstring_str(tp->vstr)); + VSTRING_ADDCH(vp, ']'); + break; + case TOK822_STARTGRP: + VSTRING_ADDCH(vp, ':'); + break; + default: + if (tp->type >= TOK822_MINTOK) + msg_panic("tok822_internalize: unknown operator %d", tp->type); + VSTRING_ADDCH(vp, tp->type); + } + if (tok822_append_space(tp)) + VSTRING_ADDCH(vp, ' '); + } + if (flags & TOK822_STR_TERM) + VSTRING_TERMINATE(vp); + return (vp); +} + +/* strip_address - strip non-address text from address expression */ + +static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr) +{ + VSTRING *tmp; + + /* + * Emit plain <address>. Discard any comments or phrases. + */ + VSTRING_TERMINATE(vp); + msg_warn("stripping too many comments from address: %.100s...", + printable(vstring_str(vp) + start, '?')); + vstring_truncate(vp, start); + VSTRING_ADDCH(vp, '<'); + if (addr) { + tmp = vstring_alloc(100); + tok822_internalize(tmp, addr, TOK822_STR_TERM); + quote_822_local_flags(vp, vstring_str(tmp), + QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); + vstring_free(tmp); + } + VSTRING_ADDCH(vp, '>'); +} + +/* tok822_externalize - token tree to string, external form */ + +VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags) +{ + VSTRING *tmp; + TOK822 *tp; + ssize_t start; + TOK822 *addr; + ssize_t addr_len; + + /* + * Guard against a Sendmail buffer overflow (CERT advisory CA-2003-07). + * The problem was that Sendmail could store too much non-address text + * (comments, phrases, etc.) into a static 256-byte buffer. + * + * When the buffer fills up, fixed Sendmail versions remove comments etc. + * and reduce the information to just <$g>, which expands to <address>. + * No change is made when an address expression (text separated by + * commas) contains no address. This fix reportedly also protects + * Sendmail systems that are still vulnerable to this problem. + * + * Postfix takes the same approach, grudgingly. To avoid unnecessary damage, + * Postfix removes comments etc. only when the amount of non-address text + * in an address expression (text separated by commas) exceeds 250 bytes. + * + * With Sendmail, the address part of an address expression is the + * right-most <> instance in that expression. If an address expression + * contains no <>, then Postfix guarantees that it contains at most one + * non-comment string; that string is the address part of the address + * expression, so there is no ambiguity. + * + * Finally, we note that stress testing shows that other code in Sendmail + * 8.12.8 bluntly truncates ``text <address>'' to 256 bytes even when + * this means chopping the <address> somewhere in the middle. This is a + * loss of control that we're not entirely comfortable with. However, + * unbalanced quotes and dangling backslash do not seem to influence the + * way that Sendmail parses headers, so this is not an urgent problem. + */ +#define MAX_NONADDR_LENGTH 250 + +#define RESET_NONADDR_LENGTH { \ + start = VSTRING_LEN(vp); \ + addr = 0; \ + addr_len = 0; \ + } + +#define ENFORCE_NONADDR_LENGTH do { \ + if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \ + strip_address(vp, start, addr->head); \ + } while(0) + + if (flags & TOK822_STR_WIPE) + VSTRING_RESET(vp); + + if (flags & TOK822_STR_TRNC) + RESET_NONADDR_LENGTH; + + for (tp = tree; tp; tp = tp->next) { + switch (tp->type) { + case ',': + if (flags & TOK822_STR_TRNC) + ENFORCE_NONADDR_LENGTH; + VSTRING_ADDCH(vp, tp->type); + VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' '); + if (flags & TOK822_STR_TRNC) + RESET_NONADDR_LENGTH; + continue; + + /* + * XXX In order to correctly externalize an address, it is not + * sufficient to quote individual atoms. There are higher-level + * rules that say when an address localpart needs to be quoted. + * We wing it with the quote_822_local() routine, which ignores + * the issue of atoms in the domain part that would need quoting. + */ + case TOK822_ADDR: + addr = tp; + tmp = vstring_alloc(100); + tok822_internalize(tmp, tp->head, TOK822_STR_TERM); + addr_len = VSTRING_LEN(vp); + quote_822_local_flags(vp, vstring_str(tmp), + QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); + addr_len = VSTRING_LEN(vp) - addr_len; + vstring_free(tmp); + break; + case TOK822_ATOM: + case TOK822_COMMENT: + vstring_strcat(vp, vstring_str(tp->vstr)); + break; + case TOK822_QSTRING: + VSTRING_ADDCH(vp, '"'); + tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n"); + VSTRING_ADDCH(vp, '"'); + break; + case TOK822_DOMLIT: + VSTRING_ADDCH(vp, '['); + tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n"); + VSTRING_ADDCH(vp, ']'); + break; + case TOK822_STARTGRP: + VSTRING_ADDCH(vp, ':'); + break; + case '<': + if (tp->next && tp->next->type == '>') { + addr = tp; + addr_len = 0; + } + VSTRING_ADDCH(vp, '<'); + break; + default: + if (tp->type >= TOK822_MINTOK) + msg_panic("tok822_externalize: unknown operator %d", tp->type); + VSTRING_ADDCH(vp, tp->type); + } + if (tok822_append_space(tp)) + VSTRING_ADDCH(vp, ' '); + } + if (flags & TOK822_STR_TRNC) + ENFORCE_NONADDR_LENGTH; + + if (flags & TOK822_STR_TERM) + VSTRING_TERMINATE(vp); + return (vp); +} + +/* tok822_copy_quoted - copy a string while quoting */ + +static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set) +{ + int ch; + + while ((ch = *(unsigned char *) str++) != 0) { + if (strchr(quote_set, ch)) + VSTRING_ADDCH(vp, '\\'); + VSTRING_ADDCH(vp, ch); + } +} + +/* tok822_append_space - see if space is needed after this token */ + +static int tok822_append_space(TOK822 *tp) +{ + TOK822 *next; + + if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0) + return (0); + if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<') + return (1); + +#define NON_OPERATOR(x) \ + (x->type == TOK822_ATOM || x->type == TOK822_QSTRING \ + || x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \ + || x->type == TOK822_ADDR) + + return (NON_OPERATOR(tp) && NON_OPERATOR(next)); +} + +/* tok822_scan_limit - tokenize string */ + +TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit) +{ + TOK822 *head = 0; + TOK822 *tail = 0; + TOK822 *tp; + int ch; + int tok_count = 0; + + /* + * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to + * allow for forms such as: Johnny B. Goode <johhny@domain.org>. I cannot + * handle that at the tokenizer level - it is not context sensitive. And + * to fix this at the parser level requires radical changes to preserve + * white space as part of the token stream. Thanks a lot, people. + */ + while ((ch = *(unsigned char *) str++) != 0) { + if (IS_SPACE_TAB_CR_LF(ch)) + continue; + if (ch == '(') { + tp = tok822_alloc(TOK822_COMMENT, (char *) 0); + str = tok822_comment(tp, str); + } else if (ch == '[') { + tp = tok822_alloc(TOK822_DOMLIT, (char *) 0); + COLLECT_SKIP_LAST(tp, str, ch, ch != ']'); + } else if (ch == '"') { + tp = tok822_alloc(TOK822_QSTRING, (char *) 0); + COLLECT_SKIP_LAST(tp, str, ch, ch != '"'); + } else if (ch != '\\' && strchr(tok822_opchar, ch)) { + tp = tok822_alloc(ch, (char *) 0); + } else { + tp = tok822_alloc(TOK822_ATOM, (char *) 0); + str -= 1; /* \ may be first */ + COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch)); + tok822_quote_atom(tp); + } + if (head == 0) { + head = tail = tp; + while (tail->next) + tail = tail->next; + } else { + tail = tok822_append(tail, tp); + } + if (tok_count_limit > 0 && ++tok_count >= tok_count_limit) + break; + } + if (tailp) + *tailp = tail; + return (head); +} + +/* tok822_parse_limit - translate external string to token tree */ + +TOK822 *tok822_parse_limit(const char *str, int tok_count_limit) +{ + TOK822 *head; + TOK822 *tail; + TOK822 *right; + TOK822 *first_token; + TOK822 *last_token; + TOK822 *tp; + int state; + + /* + * First, tokenize the string, from left to right. We are not allowed to + * throw away any information that we do not understand. With a flat + * token list that contains all tokens, we can always convert back to + * string form. + */ + if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0) + return (0); + + /* + * For convenience, sandwich the token list between two sentinel tokens. + */ +#define GLUE(left,rite) { left->next = rite; rite->prev = left; } + + head = tok822_alloc(0, (char *) 0); + GLUE(head, first_token); + tail = tok822_alloc(0, (char *) 0); + GLUE(last_token, tail); + + /* + * Next step is to transform the token list into a parse tree. This is + * done most conveniently from right to left. If there is something that + * we do not understand, just leave it alone, don't throw it away. The + * address information that we're looking for sits in-between the current + * node (tp) and the one called right. Add missing commas on the fly. + */ + state = DO_WORD; + right = tail; + tp = tail->prev; + while (tp->type) { + if (tp->type == TOK822_COMMENT) { /* move comment to the side */ + MOVE_COMMENT_AND_CONTINUE(tp, right); + } else if (tp->type == ';') { /* rh side of named group */ + right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); + state = DO_GROUP | DO_WORD; + } else if (tp->type == ':' && (state & DO_GROUP) != 0) { + tp->type = TOK822_STARTGRP; + (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); + SKIP(tp, tp->type != ','); + right = tp; + continue; + } else if (tp->type == '>') { /* rh side of <route> */ + right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); + SKIP_MOVE_COMMENT(tp, tp->type != '<', right); + (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); + SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0); + right = tp; + state |= DO_WORD; + continue; + } else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING + || tp->type == TOK822_DOMLIT) { + if ((state & DO_WORD) == 0) + right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next; + state &= ~DO_WORD; + } else if (tp->type == ',') { + right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); + state |= DO_WORD; + } else { + state |= DO_WORD; + } + tp = tp->prev; + } + (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); + + /* + * Discard the sentinel tokens on the left and right extremes. Properly + * terminate the resulting list. + */ + tp = (head->next != tail ? head->next : 0); + tok822_cut_before(head->next); + tok822_free(head); + tok822_cut_before(tail); + tok822_free(tail); + return (tp); +} + +/* tok822_quote_atom - see if an atom needs quoting when externalized */ + +static void tok822_quote_atom(TOK822 *tp) +{ + char *cp; + int ch; + + /* + * RFC 822 expects 7-bit data. Rather than quoting every 8-bit character + * (and still passing it on as 8-bit data) we leave 8-bit data alone. + */ + for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) { + if ( /* !ISASCII(ch) || */ ch == ' ' + || ISCNTRL(ch) || strchr(tok822_opchar, ch)) { + tp->type = TOK822_QSTRING; + break; + } + } +} + +/* tok822_comment - tokenize comment */ + +static const char *tok822_comment(TOK822 *tp, const char *str) +{ + int level = 1; + int ch; + + /* + * XXX We cheat by storing comments in their external form. Otherwise it + * would be a royal pain to preserve \ before (. That would require a + * recursive parser; the easy to implement stack-based recursion would be + * too expensive. + */ + VSTRING_ADDCH(tp->vstr, '('); + + while ((ch = *(unsigned char *) str) != 0) { + VSTRING_ADDCH(tp->vstr, ch); + str++; + if (ch == '(') { /* comments can nest! */ + level++; + } else if (ch == ')') { + if (--level == 0) + break; + } else if (ch == '\\') { + if ((ch = *(unsigned char *) str) == 0) + break; + VSTRING_ADDCH(tp->vstr, ch); + str++; + } + } + VSTRING_TERMINATE(tp->vstr); + return (str); +} + +/* tok822_group - cluster a group of tokens */ + +static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type) +{ + TOK822 *group; + TOK822 *sync; + TOK822 *first; + + /* + * Cluster the tokens between left and right under their own parse tree + * node. Optionally insert a resync token. + */ + if (left != right && (first = left->next) != right) { + tok822_cut_before(right); + tok822_cut_before(first); + group = tok822_alloc(group_type, (char *) 0); + tok822_sub_append(group, first); + tok822_append(left, group); + tok822_append(group, right); + if (sync_type) { + sync = tok822_alloc(sync_type, (char *) 0); + tok822_append(left, sync); + } + } + return (left); +} + +/* tok822_scan_addr - convert external address string to address token */ + +TOK822 *tok822_scan_addr(const char *addr) +{ + TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0); + + tree->head = tok822_scan(addr, &tree->tail); + return (tree); +} + +#ifdef TEST + +#include <unistd.h> +#include <vstream.h> +#include <readlline.h> + +/* tok822_print - display token */ + +static void tok822_print(TOK822 *list, int indent) +{ + TOK822 *tp; + + for (tp = list; tp; tp = tp->next) { + if (tp->type < TOK822_MINTOK) { + vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type); + } else if (tp->type == TOK822_ADDR) { + vstream_printf("%*s %s\n", indent, "", "address"); + tok822_print(tp->head, indent + 2); + } else if (tp->type == TOK822_STARTGRP) { + vstream_printf("%*s %s\n", indent, "", "group \":\""); + } else { + vstream_printf("%*s %s \"%s\"\n", indent, "", + tp->type == TOK822_COMMENT ? "comment" : + tp->type == TOK822_ATOM ? "atom" : + tp->type == TOK822_QSTRING ? "quoted string" : + tp->type == TOK822_DOMLIT ? "domain literal" : + tp->type == TOK822_ADDR ? "address" : + "unknown\n", vstring_str(tp->vstr)); + } + } +} + +int main(int unused_argc, char **unused_argv) +{ + VSTRING *vp = vstring_alloc(100); + TOK822 *list; + VSTRING *buf = vstring_alloc(100); + +#define TEST_TOKEN_LIMIT 20 + + while (readlline(buf, VSTREAM_IN, (int *) 0)) { + while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') { + vstring_end(buf)[-1] = 0; + vstring_truncate(buf, VSTRING_LEN(buf) - 1); + } + if (!isatty(vstream_fileno(VSTREAM_IN))) + vstream_printf(">>>%s<<<\n\n", vstring_str(buf)); + list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT); + vstream_printf("Parse tree:\n"); + tok822_print(list, 0); + vstream_printf("\n"); + + vstream_printf("Internalized:\n%s\n\n", + vstring_str(tok822_internalize(vp, list, TOK822_STR_DEFL))); + vstream_fflush(VSTREAM_OUT); + vstream_printf("Externalized, no newlines inserted:\n%s\n\n", + vstring_str(tok822_externalize(vp, list, + TOK822_STR_DEFL | TOK822_STR_TRNC))); + vstream_fflush(VSTREAM_OUT); + vstream_printf("Externalized, newlines inserted:\n%s\n\n", + vstring_str(tok822_externalize(vp, list, + TOK822_STR_DEFL | TOK822_STR_LINE | TOK822_STR_TRNC))); + vstream_fflush(VSTREAM_OUT); + tok822_free_tree(list); + } + vstring_free(vp); + vstring_free(buf); + return (0); +} + +#endif |