summaryrefslogtreecommitdiffstats
path: root/src/global/tok822_parse.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 12:06:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 12:06:34 +0000
commit5e61585d76ae77fd5e9e96ebabb57afa4d74880d (patch)
tree2b467823aaeebc7ef8bc9e3cabe8074eaef1666d /src/global/tok822_parse.c
parentInitial commit. (diff)
downloadpostfix-upstream.tar.xz
postfix-upstream.zip
Adding upstream version 3.5.24.upstream/3.5.24upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/global/tok822_parse.c')
-rw-r--r--src/global/tok822_parse.c726
1 files changed, 726 insertions, 0 deletions
diff --git a/src/global/tok822_parse.c b/src/global/tok822_parse.c
new file mode 100644
index 0000000..4376802
--- /dev/null
+++ b/src/global/tok822_parse.c
@@ -0,0 +1,726 @@
+/*++
+/* NAME
+/* tok822_parse 3
+/* SUMMARY
+/* RFC 822 address parser
+/* SYNOPSIS
+/* #include <tok822.h>
+/*
+/* TOK822 *tok822_scan_limit(str, tailp, limit)
+/* const char *str;
+/* TOK822 **tailp;
+/* int limit;
+/*
+/* TOK822 *tok822_scan(str, tailp)
+/* const char *str;
+/* TOK822 **tailp;
+/*
+/* TOK822 *tok822_parse_limit(str, limit)
+/* const char *str;
+/* int limit;
+/*
+/* TOK822 *tok822_parse(str)
+/* const char *str;
+/*
+/* TOK822 *tok822_scan_addr(str)
+/* const char *str;
+/*
+/* VSTRING *tok822_externalize(buffer, tree, flags)
+/* VSTRING *buffer;
+/* TOK822 *tree;
+/* int flags;
+/*
+/* VSTRING *tok822_internalize(buffer, tree, flags)
+/* VSTRING *buffer;
+/* TOK822 *tree;
+/* int flags;
+/* DESCRIPTION
+/* This module converts address lists between string form and parse
+/* tree formats. The string form can appear in two different ways:
+/* external (or quoted) form, as used in message headers, and internal
+/* (unquoted) form, as used internally by the mail software.
+/* Although RFC 822 expects 7-bit data, these routines pay no
+/* special attention to 8-bit characters.
+/*
+/* tok822_scan() converts the external-form string in \fIstr\fR
+/* to a linear token list. The \fItailp\fR argument is a null pointer
+/* or receives the pointer value of the last result list element.
+/*
+/* tok822_scan_limit() implements tok822_scan(), which is a macro.
+/* The \fIlimit\fR argument is either zero or an upper bound on the
+/* number of tokens produced.
+/*
+/* tok822_parse() converts the external-form address list in
+/* \fIstr\fR to the corresponding token tree. The parser is permissive
+/* and will not throw away information that it does not understand.
+/* The parser adds missing commas between addresses.
+/*
+/* tok822_parse_limit() implements tok822_parse(), which is a macro.
+/* The \fIlimit\fR argument is either zero or an upper bound on the
+/* number of tokens produced.
+/*
+/* tok822_scan_addr() converts the external-form string in
+/* \fIstr\fR to an address token tree. This is just string to
+/* token list conversion; no parsing is done. This routine is
+/* suitable for data that should contain just one address and no
+/* other information.
+/*
+/* tok822_externalize() converts a token list to external form.
+/* Where appropriate, characters and strings are quoted and white
+/* space is inserted. The \fIflags\fR argument is the binary OR of
+/* zero or more of the following:
+/* .IP TOK822_STR_WIPE
+/* Initially, truncate the result to zero length.
+/* .IP TOK822_STR_TERM
+/* Append a null terminator to the result when done.
+/* .IP TOK822_STR_LINE
+/* Append a line break after each comma token, instead of appending
+/* whitespace. It is up to the caller to concatenate short lines to
+/* produce longer ones.
+/* .IP TOK822_STR_TRNC
+/* Truncate non-address information to 250 characters per address, to
+/* protect Sendmail systems that are vulnerable to the problem in CERT
+/* advisory CA-2003-07.
+/* This flag has effect with tok822_externalize() only.
+/* .PP
+/* The macro TOK_822_NONE expresses that none of the above features
+/* should be activated.
+/*
+/* The macro TOK822_STR_DEFL combines the TOK822_STR_WIPE and
+/* TOK822_STR_TERM flags. This is useful for most token to string
+/* conversions.
+/*
+/* The macro TOK822_STR_HEAD combines the TOK822_STR_TERM,
+/* TOK822_STR_LINE and TOK822_STR_TRNC flags. This is useful for
+/* the special case of token to mail header conversion.
+/*
+/* tok822_internalize() converts a token list to string form,
+/* without quoting. White space is inserted where appropriate.
+/* The \fIflags\fR argument is as with tok822_externalize().
+/* STANDARDS
+/* .ad
+/* .fi
+/* RFC 822 (ARPA Internet Text Messages). In addition to this standard
+/* this module implements additional operators such as % and !. These
+/* are needed because the real world is not all RFC 822. Also, the ':'
+/* operator is allowed to appear inside addresses, to accommodate DECnet.
+/* In addition, 8-bit data is not given special treatment.
+/* LICENSE
+/* .ad
+/* .fi
+/* The Secure Mailer license must be distributed with this software.
+/* AUTHOR(S)
+/* Wietse Venema
+/* IBM T.J. Watson Research
+/* P.O. Box 704
+/* Yorktown Heights, NY 10598, USA
+/*--*/
+
+/* System library. */
+
+#include <sys_defs.h>
+#include <ctype.h>
+#include <string.h>
+
+/* Utility library. */
+
+#include <vstring.h>
+#include <msg.h>
+#include <stringops.h>
+
+/* Global library. */
+
+#include "lex_822.h"
+#include "quote_822_local.h"
+#include "tok822.h"
+
+ /*
+ * I suppose this is my favorite macro. Used heavily for tokenizing.
+ */
+#define COLLECT(t,s,c,cond) { \
+ while ((c = *(unsigned char *) s) != 0) { \
+ if (c == '\\') { \
+ if ((c = *(unsigned char *)++s) == 0) \
+ break; \
+ } else if (!(cond)) { \
+ break; \
+ } \
+ VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \
+ s++; \
+ } \
+ VSTRING_TERMINATE(t->vstr); \
+ }
+
+#define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; }
+
+ /*
+ * Not quite as complex. The parser depends heavily on it.
+ */
+#define SKIP(tp, cond) { \
+ while (tp->type && (cond)) \
+ tp = tp->prev; \
+ }
+
+#define MOVE_COMMENT_AND_CONTINUE(tp, right) { \
+ TOK822 *prev = tok822_unlink(tp); \
+ right = tok822_prepend(right, tp); \
+ tp = prev; \
+ continue; \
+ }
+
+#define SKIP_MOVE_COMMENT(tp, cond, right) { \
+ while (tp->type && (cond)) { \
+ if (tp->type == TOK822_COMMENT) \
+ MOVE_COMMENT_AND_CONTINUE(tp, right); \
+ tp = tp->prev; \
+ } \
+ }
+
+ /*
+ * Single-character operators. We include the % and ! operators because not
+ * all the world is RFC822. XXX Make this operator list configurable when we
+ * have a real rewriting language. Include | for aliases file parsing.
+ */
+static char tok822_opchar[] = "|%!" LEX_822_SPECIALS;
+static void tok822_quote_atom(TOK822 *);
+static const char *tok822_comment(TOK822 *, const char *);
+static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int);
+static void tok822_copy_quoted(VSTRING *, char *, char *);
+static int tok822_append_space(TOK822 *);
+
+#define DO_WORD (1<<0) /* finding a word is ok here */
+#define DO_GROUP (1<<1) /* doing an address group */
+
+#define ADD_COMMA ',' /* resynchronize */
+#define NO_MISSING_COMMA 0
+
+/* tok822_internalize - token tree to string, internal form */
+
+VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags)
+{
+ TOK822 *tp;
+
+ if (flags & TOK822_STR_WIPE)
+ VSTRING_RESET(vp);
+
+ for (tp = tree; tp; tp = tp->next) {
+ switch (tp->type) {
+ case ',':
+ VSTRING_ADDCH(vp, tp->type);
+ if (flags & TOK822_STR_LINE) {
+ VSTRING_ADDCH(vp, '\n');
+ continue;
+ }
+ break;
+ case TOK822_ADDR:
+ tok822_internalize(vp, tp->head, TOK822_STR_NONE);
+ break;
+ case TOK822_COMMENT:
+ case TOK822_ATOM:
+ case TOK822_QSTRING:
+ vstring_strcat(vp, vstring_str(tp->vstr));
+ break;
+ case TOK822_DOMLIT:
+ VSTRING_ADDCH(vp, '[');
+ vstring_strcat(vp, vstring_str(tp->vstr));
+ VSTRING_ADDCH(vp, ']');
+ break;
+ case TOK822_STARTGRP:
+ VSTRING_ADDCH(vp, ':');
+ break;
+ default:
+ if (tp->type >= TOK822_MINTOK)
+ msg_panic("tok822_internalize: unknown operator %d", tp->type);
+ VSTRING_ADDCH(vp, tp->type);
+ }
+ if (tok822_append_space(tp))
+ VSTRING_ADDCH(vp, ' ');
+ }
+ if (flags & TOK822_STR_TERM)
+ VSTRING_TERMINATE(vp);
+ return (vp);
+}
+
+/* strip_address - strip non-address text from address expression */
+
+static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr)
+{
+ VSTRING *tmp;
+
+ /*
+ * Emit plain <address>. Discard any comments or phrases.
+ */
+ VSTRING_TERMINATE(vp);
+ msg_warn("stripping too many comments from address: %.100s...",
+ printable(vstring_str(vp) + start, '?'));
+ vstring_truncate(vp, start);
+ VSTRING_ADDCH(vp, '<');
+ if (addr) {
+ tmp = vstring_alloc(100);
+ tok822_internalize(tmp, addr, TOK822_STR_TERM);
+ quote_822_local_flags(vp, vstring_str(tmp),
+ QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND);
+ vstring_free(tmp);
+ }
+ VSTRING_ADDCH(vp, '>');
+}
+
+/* tok822_externalize - token tree to string, external form */
+
+VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags)
+{
+ VSTRING *tmp;
+ TOK822 *tp;
+ ssize_t start;
+ TOK822 *addr;
+ ssize_t addr_len;
+
+ /*
+ * Guard against a Sendmail buffer overflow (CERT advisory CA-2003-07).
+ * The problem was that Sendmail could store too much non-address text
+ * (comments, phrases, etc.) into a static 256-byte buffer.
+ *
+ * When the buffer fills up, fixed Sendmail versions remove comments etc.
+ * and reduce the information to just <$g>, which expands to <address>.
+ * No change is made when an address expression (text separated by
+ * commas) contains no address. This fix reportedly also protects
+ * Sendmail systems that are still vulnerable to this problem.
+ *
+ * Postfix takes the same approach, grudgingly. To avoid unnecessary damage,
+ * Postfix removes comments etc. only when the amount of non-address text
+ * in an address expression (text separated by commas) exceeds 250 bytes.
+ *
+ * With Sendmail, the address part of an address expression is the
+ * right-most <> instance in that expression. If an address expression
+ * contains no <>, then Postfix guarantees that it contains at most one
+ * non-comment string; that string is the address part of the address
+ * expression, so there is no ambiguity.
+ *
+ * Finally, we note that stress testing shows that other code in Sendmail
+ * 8.12.8 bluntly truncates ``text <address>'' to 256 bytes even when
+ * this means chopping the <address> somewhere in the middle. This is a
+ * loss of control that we're not entirely comfortable with. However,
+ * unbalanced quotes and dangling backslash do not seem to influence the
+ * way that Sendmail parses headers, so this is not an urgent problem.
+ */
+#define MAX_NONADDR_LENGTH 250
+
+#define RESET_NONADDR_LENGTH { \
+ start = VSTRING_LEN(vp); \
+ addr = 0; \
+ addr_len = 0; \
+ }
+
+#define ENFORCE_NONADDR_LENGTH do { \
+ if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \
+ strip_address(vp, start, addr->head); \
+ } while(0)
+
+ if (flags & TOK822_STR_WIPE)
+ VSTRING_RESET(vp);
+
+ if (flags & TOK822_STR_TRNC)
+ RESET_NONADDR_LENGTH;
+
+ for (tp = tree; tp; tp = tp->next) {
+ switch (tp->type) {
+ case ',':
+ if (flags & TOK822_STR_TRNC)
+ ENFORCE_NONADDR_LENGTH;
+ VSTRING_ADDCH(vp, tp->type);
+ VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' ');
+ if (flags & TOK822_STR_TRNC)
+ RESET_NONADDR_LENGTH;
+ continue;
+
+ /*
+ * XXX In order to correctly externalize an address, it is not
+ * sufficient to quote individual atoms. There are higher-level
+ * rules that say when an address localpart needs to be quoted.
+ * We wing it with the quote_822_local() routine, which ignores
+ * the issue of atoms in the domain part that would need quoting.
+ */
+ case TOK822_ADDR:
+ addr = tp;
+ tmp = vstring_alloc(100);
+ tok822_internalize(tmp, tp->head, TOK822_STR_TERM);
+ addr_len = VSTRING_LEN(vp);
+ quote_822_local_flags(vp, vstring_str(tmp),
+ QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND);
+ addr_len = VSTRING_LEN(vp) - addr_len;
+ vstring_free(tmp);
+ break;
+ case TOK822_ATOM:
+ case TOK822_COMMENT:
+ vstring_strcat(vp, vstring_str(tp->vstr));
+ break;
+ case TOK822_QSTRING:
+ VSTRING_ADDCH(vp, '"');
+ tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n");
+ VSTRING_ADDCH(vp, '"');
+ break;
+ case TOK822_DOMLIT:
+ VSTRING_ADDCH(vp, '[');
+ tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n");
+ VSTRING_ADDCH(vp, ']');
+ break;
+ case TOK822_STARTGRP:
+ VSTRING_ADDCH(vp, ':');
+ break;
+ case '<':
+ if (tp->next && tp->next->type == '>') {
+ addr = tp;
+ addr_len = 0;
+ }
+ VSTRING_ADDCH(vp, '<');
+ break;
+ default:
+ if (tp->type >= TOK822_MINTOK)
+ msg_panic("tok822_externalize: unknown operator %d", tp->type);
+ VSTRING_ADDCH(vp, tp->type);
+ }
+ if (tok822_append_space(tp))
+ VSTRING_ADDCH(vp, ' ');
+ }
+ if (flags & TOK822_STR_TRNC)
+ ENFORCE_NONADDR_LENGTH;
+
+ if (flags & TOK822_STR_TERM)
+ VSTRING_TERMINATE(vp);
+ return (vp);
+}
+
+/* tok822_copy_quoted - copy a string while quoting */
+
+static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set)
+{
+ int ch;
+
+ while ((ch = *(unsigned char *) str++) != 0) {
+ if (strchr(quote_set, ch))
+ VSTRING_ADDCH(vp, '\\');
+ VSTRING_ADDCH(vp, ch);
+ }
+}
+
+/* tok822_append_space - see if space is needed after this token */
+
+static int tok822_append_space(TOK822 *tp)
+{
+ TOK822 *next;
+
+ if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0)
+ return (0);
+ if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<')
+ return (1);
+
+#define NON_OPERATOR(x) \
+ (x->type == TOK822_ATOM || x->type == TOK822_QSTRING \
+ || x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \
+ || x->type == TOK822_ADDR)
+
+ return (NON_OPERATOR(tp) && NON_OPERATOR(next));
+}
+
+/* tok822_scan_limit - tokenize string */
+
+TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit)
+{
+ TOK822 *head = 0;
+ TOK822 *tail = 0;
+ TOK822 *tp;
+ int ch;
+ int tok_count = 0;
+
+ /*
+ * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to
+ * allow for forms such as: Johnny B. Goode <johhny@domain.org>. I cannot
+ * handle that at the tokenizer level - it is not context sensitive. And
+ * to fix this at the parser level requires radical changes to preserve
+ * white space as part of the token stream. Thanks a lot, people.
+ */
+ while ((ch = *(unsigned char *) str++) != 0) {
+ if (IS_SPACE_TAB_CR_LF(ch))
+ continue;
+ if (ch == '(') {
+ tp = tok822_alloc(TOK822_COMMENT, (char *) 0);
+ str = tok822_comment(tp, str);
+ } else if (ch == '[') {
+ tp = tok822_alloc(TOK822_DOMLIT, (char *) 0);
+ COLLECT_SKIP_LAST(tp, str, ch, ch != ']');
+ } else if (ch == '"') {
+ tp = tok822_alloc(TOK822_QSTRING, (char *) 0);
+ COLLECT_SKIP_LAST(tp, str, ch, ch != '"');
+ } else if (ch != '\\' && strchr(tok822_opchar, ch)) {
+ tp = tok822_alloc(ch, (char *) 0);
+ } else {
+ tp = tok822_alloc(TOK822_ATOM, (char *) 0);
+ str -= 1; /* \ may be first */
+ COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch));
+ tok822_quote_atom(tp);
+ }
+ if (head == 0) {
+ head = tail = tp;
+ while (tail->next)
+ tail = tail->next;
+ } else {
+ tail = tok822_append(tail, tp);
+ }
+ if (tok_count_limit > 0 && ++tok_count >= tok_count_limit)
+ break;
+ }
+ if (tailp)
+ *tailp = tail;
+ return (head);
+}
+
+/* tok822_parse_limit - translate external string to token tree */
+
+TOK822 *tok822_parse_limit(const char *str, int tok_count_limit)
+{
+ TOK822 *head;
+ TOK822 *tail;
+ TOK822 *right;
+ TOK822 *first_token;
+ TOK822 *last_token;
+ TOK822 *tp;
+ int state;
+
+ /*
+ * First, tokenize the string, from left to right. We are not allowed to
+ * throw away any information that we do not understand. With a flat
+ * token list that contains all tokens, we can always convert back to
+ * string form.
+ */
+ if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0)
+ return (0);
+
+ /*
+ * For convenience, sandwich the token list between two sentinel tokens.
+ */
+#define GLUE(left,rite) { left->next = rite; rite->prev = left; }
+
+ head = tok822_alloc(0, (char *) 0);
+ GLUE(head, first_token);
+ tail = tok822_alloc(0, (char *) 0);
+ GLUE(last_token, tail);
+
+ /*
+ * Next step is to transform the token list into a parse tree. This is
+ * done most conveniently from right to left. If there is something that
+ * we do not understand, just leave it alone, don't throw it away. The
+ * address information that we're looking for sits in-between the current
+ * node (tp) and the one called right. Add missing commas on the fly.
+ */
+ state = DO_WORD;
+ right = tail;
+ tp = tail->prev;
+ while (tp->type) {
+ if (tp->type == TOK822_COMMENT) { /* move comment to the side */
+ MOVE_COMMENT_AND_CONTINUE(tp, right);
+ } else if (tp->type == ';') { /* rh side of named group */
+ right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
+ state = DO_GROUP | DO_WORD;
+ } else if (tp->type == ':' && (state & DO_GROUP) != 0) {
+ tp->type = TOK822_STARTGRP;
+ (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
+ SKIP(tp, tp->type != ',');
+ right = tp;
+ continue;
+ } else if (tp->type == '>') { /* rh side of <route> */
+ right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA);
+ SKIP_MOVE_COMMENT(tp, tp->type != '<', right);
+ (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
+ SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0);
+ right = tp;
+ state |= DO_WORD;
+ continue;
+ } else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING
+ || tp->type == TOK822_DOMLIT) {
+ if ((state & DO_WORD) == 0)
+ right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next;
+ state &= ~DO_WORD;
+ } else if (tp->type == ',') {
+ right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
+ state |= DO_WORD;
+ } else {
+ state |= DO_WORD;
+ }
+ tp = tp->prev;
+ }
+ (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA);
+
+ /*
+ * Discard the sentinel tokens on the left and right extremes. Properly
+ * terminate the resulting list.
+ */
+ tp = (head->next != tail ? head->next : 0);
+ tok822_cut_before(head->next);
+ tok822_free(head);
+ tok822_cut_before(tail);
+ tok822_free(tail);
+ return (tp);
+}
+
+/* tok822_quote_atom - see if an atom needs quoting when externalized */
+
+static void tok822_quote_atom(TOK822 *tp)
+{
+ char *cp;
+ int ch;
+
+ /*
+ * RFC 822 expects 7-bit data. Rather than quoting every 8-bit character
+ * (and still passing it on as 8-bit data) we leave 8-bit data alone.
+ */
+ for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) {
+ if ( /* !ISASCII(ch) || */ ch == ' '
+ || ISCNTRL(ch) || strchr(tok822_opchar, ch)) {
+ tp->type = TOK822_QSTRING;
+ break;
+ }
+ }
+}
+
+/* tok822_comment - tokenize comment */
+
+static const char *tok822_comment(TOK822 *tp, const char *str)
+{
+ int level = 1;
+ int ch;
+
+ /*
+ * XXX We cheat by storing comments in their external form. Otherwise it
+ * would be a royal pain to preserve \ before (. That would require a
+ * recursive parser; the easy to implement stack-based recursion would be
+ * too expensive.
+ */
+ VSTRING_ADDCH(tp->vstr, '(');
+
+ while ((ch = *(unsigned char *) str) != 0) {
+ VSTRING_ADDCH(tp->vstr, ch);
+ str++;
+ if (ch == '(') { /* comments can nest! */
+ level++;
+ } else if (ch == ')') {
+ if (--level == 0)
+ break;
+ } else if (ch == '\\') {
+ if ((ch = *(unsigned char *) str) == 0)
+ break;
+ VSTRING_ADDCH(tp->vstr, ch);
+ str++;
+ }
+ }
+ VSTRING_TERMINATE(tp->vstr);
+ return (str);
+}
+
+/* tok822_group - cluster a group of tokens */
+
+static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type)
+{
+ TOK822 *group;
+ TOK822 *sync;
+ TOK822 *first;
+
+ /*
+ * Cluster the tokens between left and right under their own parse tree
+ * node. Optionally insert a resync token.
+ */
+ if (left != right && (first = left->next) != right) {
+ tok822_cut_before(right);
+ tok822_cut_before(first);
+ group = tok822_alloc(group_type, (char *) 0);
+ tok822_sub_append(group, first);
+ tok822_append(left, group);
+ tok822_append(group, right);
+ if (sync_type) {
+ sync = tok822_alloc(sync_type, (char *) 0);
+ tok822_append(left, sync);
+ }
+ }
+ return (left);
+}
+
+/* tok822_scan_addr - convert external address string to address token */
+
+TOK822 *tok822_scan_addr(const char *addr)
+{
+ TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0);
+
+ tree->head = tok822_scan(addr, &tree->tail);
+ return (tree);
+}
+
+#ifdef TEST
+
+#include <unistd.h>
+#include <vstream.h>
+#include <readlline.h>
+
+/* tok822_print - display token */
+
+static void tok822_print(TOK822 *list, int indent)
+{
+ TOK822 *tp;
+
+ for (tp = list; tp; tp = tp->next) {
+ if (tp->type < TOK822_MINTOK) {
+ vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type);
+ } else if (tp->type == TOK822_ADDR) {
+ vstream_printf("%*s %s\n", indent, "", "address");
+ tok822_print(tp->head, indent + 2);
+ } else if (tp->type == TOK822_STARTGRP) {
+ vstream_printf("%*s %s\n", indent, "", "group \":\"");
+ } else {
+ vstream_printf("%*s %s \"%s\"\n", indent, "",
+ tp->type == TOK822_COMMENT ? "comment" :
+ tp->type == TOK822_ATOM ? "atom" :
+ tp->type == TOK822_QSTRING ? "quoted string" :
+ tp->type == TOK822_DOMLIT ? "domain literal" :
+ tp->type == TOK822_ADDR ? "address" :
+ "unknown\n", vstring_str(tp->vstr));
+ }
+ }
+}
+
+int main(int unused_argc, char **unused_argv)
+{
+ VSTRING *vp = vstring_alloc(100);
+ TOK822 *list;
+ VSTRING *buf = vstring_alloc(100);
+
+#define TEST_TOKEN_LIMIT 20
+
+ while (readlline(buf, VSTREAM_IN, (int *) 0)) {
+ while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') {
+ vstring_end(buf)[-1] = 0;
+ vstring_truncate(buf, VSTRING_LEN(buf) - 1);
+ }
+ if (!isatty(vstream_fileno(VSTREAM_IN)))
+ vstream_printf(">>>%s<<<\n\n", vstring_str(buf));
+ list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT);
+ vstream_printf("Parse tree:\n");
+ tok822_print(list, 0);
+ vstream_printf("\n");
+
+ vstream_printf("Internalized:\n%s\n\n",
+ vstring_str(tok822_internalize(vp, list, TOK822_STR_DEFL)));
+ vstream_fflush(VSTREAM_OUT);
+ vstream_printf("Externalized, no newlines inserted:\n%s\n\n",
+ vstring_str(tok822_externalize(vp, list,
+ TOK822_STR_DEFL | TOK822_STR_TRNC)));
+ vstream_fflush(VSTREAM_OUT);
+ vstream_printf("Externalized, newlines inserted:\n%s\n\n",
+ vstring_str(tok822_externalize(vp, list,
+ TOK822_STR_DEFL | TOK822_STR_LINE | TOK822_STR_TRNC)));
+ vstream_fflush(VSTREAM_OUT);
+ tok822_free_tree(list);
+ }
+ vstring_free(vp);
+ vstring_free(buf);
+ return (0);
+}
+
+#endif