summaryrefslogtreecommitdiffstats
path: root/libc-top-half/musl/src/regex
diff options
context:
space:
mode:
Diffstat (limited to 'libc-top-half/musl/src/regex')
-rw-r--r--libc-top-half/musl/src/regex/fnmatch.c321
-rw-r--r--libc-top-half/musl/src/regex/glob.c317
-rw-r--r--libc-top-half/musl/src/regex/regcomp.c2953
-rw-r--r--libc-top-half/musl/src/regex/regerror.c37
-rw-r--r--libc-top-half/musl/src/regex/regexec.c1028
-rw-r--r--libc-top-half/musl/src/regex/tre-mem.c158
-rw-r--r--libc-top-half/musl/src/regex/tre.h233
7 files changed, 5047 insertions, 0 deletions
diff --git a/libc-top-half/musl/src/regex/fnmatch.c b/libc-top-half/musl/src/regex/fnmatch.c
new file mode 100644
index 0000000..978fff8
--- /dev/null
+++ b/libc-top-half/musl/src/regex/fnmatch.c
@@ -0,0 +1,321 @@
+/*
+ * An implementation of what I call the "Sea of Stars" algorithm for
+ * POSIX fnmatch(). The basic idea is that we factor the pattern into
+ * a head component (which we match first and can reject without ever
+ * measuring the length of the string), an optional tail component
+ * (which only exists if the pattern contains at least one star), and
+ * an optional "sea of stars", a set of star-separated components
+ * between the head and tail. After the head and tail matches have
+ * been removed from the input string, the components in the "sea of
+ * stars" are matched sequentially by searching for their first
+ * occurrence past the end of the previous match.
+ *
+ * - Rich Felker, April 2012
+ */
+
+#include <string.h>
+#include <fnmatch.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+#include "locale_impl.h"
+
+#define END 0
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define QUESTION -4
+#define STAR -5
+
+static int str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+ return str[0];
+}
+
+static int pat_next(const char *pat, size_t m, size_t *step, int flags)
+{
+ int esc = 0;
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='[') {
+ size_t k = 1;
+ if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
+ if (k<m) if (pat[k] == ']') k++;
+ for (; k<m && pat[k] && pat[k]!=']'; k++) {
+ if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
+ int z = pat[k+1];
+ k+=2;
+ if (k<m && pat[k]) k++;
+ while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
+ if (k==m || !pat[k]) break;
+ }
+ }
+ if (k==m || !pat[k]) {
+ *step = 1;
+ return '[';
+ }
+ *step = k+1;
+ return BRACKET;
+ }
+ if (pat[0] == '*')
+ return STAR;
+ if (pat[0] == '?')
+ return QUESTION;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int casefold(int k)
+{
+ int c = towupper(k);
+ return c == k ? towlower(k) : c;
+}
+
+static int match_bracket(const char *p, int k, int kfold)
+{
+ wchar_t wc;
+ int inv = 0;
+ p++;
+ if (*p=='^' || *p=='!') {
+ inv = 1;
+ p++;
+ }
+ if (*p==']') {
+ if (k==']') return !inv;
+ p++;
+ } else if (*p=='-') {
+ if (k=='-') return !inv;
+ p++;
+ }
+ wc = p[-1];
+ for (; *p != ']'; p++) {
+ if (p[0]=='-' && p[1]!=']') {
+ wchar_t wc2;
+ int l = mbtowc(&wc2, p+1, 4);
+ if (l < 0) return 0;
+ if (wc <= wc2)
+ if ((unsigned)k-wc <= wc2-wc ||
+ (unsigned)kfold-wc <= wc2-wc)
+ return !inv;
+ p += l-1;
+ continue;
+ }
+ if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
+ const char *p0 = p+2;
+ int z = p[1];
+ p+=3;
+ while (p[-1]!=z || p[0]!=']') p++;
+ if (z == ':' && p-1-p0 < 16) {
+ char buf[16];
+ memcpy(buf, p0, p-1-p0);
+ buf[p-1-p0] = 0;
+ if (iswctype(k, wctype(buf)) ||
+ iswctype(kfold, wctype(buf)))
+ return !inv;
+ }
+ continue;
+ }
+ if (*p < 128U) {
+ wc = (unsigned char)*p;
+ } else {
+ int l = mbtowc(&wc, p, 4);
+ if (l < 0) return 0;
+ p += l-1;
+ }
+ if (wc==k || wc==kfold) return !inv;
+ }
+ return inv;
+}
+
+static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
+{
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k, kfold;
+
+ if (flags & FNM_PERIOD) {
+ if (*str == '.' && *pat != '.')
+ return FNM_NOMATCH;
+ }
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc, flags))) {
+ case UNMATCHABLE:
+ return FNM_NOMATCH;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ if (k <= 0)
+ return (c==END) ? 0 : FNM_NOMATCH;
+ str += sinc;
+ n -= sinc;
+ kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ if (c == BRACKET) {
+ if (!match_bracket(pat, k, kfold))
+ return FNM_NOMATCH;
+ } else if (c != QUESTION && k != c && kfold != c) {
+ return FNM_NOMATCH;
+ }
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc, flags)) {
+ case UNMATCHABLE:
+ return FNM_NOMATCH;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return FNM_NOMATCH;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return FNM_NOMATCH;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc, flags);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return FNM_NOMATCH;
+ break;
+ }
+ s += sinc;
+ kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ if (c == BRACKET) {
+ if (!match_bracket(p-pinc, k, kfold))
+ return FNM_NOMATCH;
+ } else if (c != QUESTION && k != c && kfold != c) {
+ return FNM_NOMATCH;
+ }
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc, flags);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return FNM_NOMATCH;
+ kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
+ if (c == BRACKET) {
+ if (!match_bracket(p-pinc, k, kfold))
+ break;
+ } else if (c != QUESTION && k != c && kfold != c) {
+ break;
+ }
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+int fnmatch(const char *pat, const char *str, int flags)
+{
+ const char *s, *p;
+ size_t inc;
+ int c;
+ if (flags & FNM_PATHNAME) for (;;) {
+ for (s=str; *s && *s!='/'; s++);
+ for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
+ if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR)))
+ return FNM_NOMATCH;
+ if (fnmatch_internal(pat, p-pat, str, s-str, flags))
+ return FNM_NOMATCH;
+ if (!c) return 0;
+ str = s+1;
+ pat = p+inc;
+ } else if (flags & FNM_LEADING_DIR) {
+ for (s=str; *s; s++) {
+ if (*s != '/') continue;
+ if (!fnmatch_internal(pat, -1, str, s-str, flags))
+ return 0;
+ }
+ }
+ return fnmatch_internal(pat, -1, str, -1, flags);
+}
diff --git a/libc-top-half/musl/src/regex/glob.c b/libc-top-half/musl/src/regex/glob.c
new file mode 100644
index 0000000..78063a4
--- /dev/null
+++ b/libc-top-half/musl/src/regex/glob.c
@@ -0,0 +1,317 @@
+#define _BSD_SOURCE
+#include <glob.h>
+#include <fnmatch.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stddef.h>
+#include <unistd.h>
+#ifdef __wasilibc_unmodified_upstream // WASI has no usernames
+#include <pwd.h>
+#endif
+
+struct match
+{
+ struct match *next;
+ char name[];
+};
+
+static int append(struct match **tail, const char *name, size_t len, int mark)
+{
+ struct match *new = malloc(sizeof(struct match) + len + 2);
+ if (!new) return -1;
+ (*tail)->next = new;
+ new->next = NULL;
+ memcpy(new->name, name, len+1);
+ if (mark && len && name[len-1]!='/') {
+ new->name[len] = '/';
+ new->name[len+1] = 0;
+ }
+ *tail = new;
+ return 0;
+}
+
+static int do_glob(char *buf, size_t pos, int type, char *pat, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
+{
+ /* If GLOB_MARK is unused, we don't care about type. */
+ if (!type && !(flags & GLOB_MARK)) type = DT_REG;
+
+ /* Special-case the remaining pattern being all slashes, in
+ * which case we can use caller-passed type if it's a dir. */
+ if (*pat && type!=DT_DIR) type = 0;
+ while (pos+1 < PATH_MAX && *pat=='/') buf[pos++] = *pat++;
+
+ /* Consume maximal [escaped-]literal prefix of pattern, copying
+ * and un-escaping it to the running buffer as we go. */
+ ptrdiff_t i=0, j=0;
+ int in_bracket = 0, overflow = 0;
+ for (; pat[i]!='*' && pat[i]!='?' && (!in_bracket || pat[i]!=']'); i++) {
+ if (!pat[i]) {
+ if (overflow) return 0;
+ pat += i;
+ pos += j;
+ i = j = 0;
+ break;
+ } else if (pat[i] == '[') {
+ in_bracket = 1;
+ } else if (pat[i] == '\\' && !(flags & GLOB_NOESCAPE)) {
+ /* Backslashes inside a bracket are (at least by
+ * our interpretation) non-special, so if next
+ * char is ']' we have a complete expression. */
+ if (in_bracket && pat[i+1]==']') break;
+ /* Unpaired final backslash never matches. */
+ if (!pat[i+1]) return 0;
+ i++;
+ }
+ if (pat[i] == '/') {
+ if (overflow) return 0;
+ in_bracket = 0;
+ pat += i+1;
+ i = -1;
+ pos += j+1;
+ j = -1;
+ }
+ /* Only store a character if it fits in the buffer, but if
+ * a potential bracket expression is open, the overflow
+ * must be remembered and handled later only if the bracket
+ * is unterminated (and thereby a literal), so as not to
+ * disallow long bracket expressions with short matches. */
+ if (pos+(j+1) < PATH_MAX) {
+ buf[pos+j++] = pat[i];
+ } else if (in_bracket) {
+ overflow = 1;
+ } else {
+ return 0;
+ }
+ /* If we consume any new components, the caller-passed type
+ * or dummy type from above is no longer valid. */
+ type = 0;
+ }
+ buf[pos] = 0;
+ if (!*pat) {
+ /* If we consumed any components above, or if GLOB_MARK is
+ * requested and we don't yet know if the match is a dir,
+ * we must confirm the file exists and/or determine its type.
+ *
+ * If marking dirs, symlink type is inconclusive; we need the
+ * type for the symlink target, and therefore must try stat
+ * first unless type is known not to be a symlink. Otherwise,
+ * or if that fails, use lstat for determining existence to
+ * avoid false negatives in the case of broken symlinks. */
+ struct stat st;
+ if ((flags & GLOB_MARK) && (!type||type==DT_LNK) && !stat(buf, &st)) {
+ if (S_ISDIR(st.st_mode)) type = DT_DIR;
+ else type = DT_REG;
+ }
+ if (!type && lstat(buf, &st)) {
+ if (errno!=ENOENT && (errfunc(buf, errno) || (flags & GLOB_ERR)))
+ return GLOB_ABORTED;
+ return 0;
+ }
+ if (append(tail, buf, pos, (flags & GLOB_MARK) && type==DT_DIR))
+ return GLOB_NOSPACE;
+ return 0;
+ }
+ char *p2 = strchr(pat, '/'), saved_sep = '/';
+ /* Check if the '/' was escaped and, if so, remove the escape char
+ * so that it will not be unpaired when passed to fnmatch. */
+ if (p2 && !(flags & GLOB_NOESCAPE)) {
+ char *p;
+ for (p=p2; p>pat && p[-1]=='\\'; p--);
+ if ((p2-p)%2) {
+ p2--;
+ saved_sep = '\\';
+ }
+ }
+ DIR *dir = opendir(pos ? buf : ".");
+ if (!dir) {
+ if (errfunc(buf, errno) || (flags & GLOB_ERR))
+ return GLOB_ABORTED;
+ return 0;
+ }
+ int old_errno = errno;
+ struct dirent *de;
+ while (errno=0, de=readdir(dir)) {
+ /* Quickly skip non-directories when there's pattern left. */
+ if (p2 && de->d_type && de->d_type!=DT_DIR && de->d_type!=DT_LNK)
+ continue;
+
+ size_t l = strlen(de->d_name);
+ if (l >= PATH_MAX-pos) continue;
+
+ if (p2) *p2 = 0;
+
+ int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
+ | ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
+
+ if (fnmatch(pat, de->d_name, fnm_flags))
+ continue;
+
+ /* With GLOB_PERIOD, don't allow matching . or .. unless
+ * fnmatch would match them with FNM_PERIOD rules in effect. */
+ if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
+ && (!de->d_name[1] || de->d_name[1]=='.' && !de->d_name[2])
+ && fnmatch(pat, de->d_name, fnm_flags | FNM_PERIOD))
+ continue;
+
+ memcpy(buf+pos, de->d_name, l+1);
+ if (p2) *p2 = saved_sep;
+ int r = do_glob(buf, pos+l, de->d_type, p2 ? p2 : "", flags, errfunc, tail);
+ if (r) {
+ closedir(dir);
+ return r;
+ }
+ }
+ int readerr = errno;
+ if (p2) *p2 = saved_sep;
+ closedir(dir);
+ if (readerr && (errfunc(buf, errno) || (flags & GLOB_ERR)))
+ return GLOB_ABORTED;
+ errno = old_errno;
+ return 0;
+}
+
+static int ignore_err(const char *path, int err)
+{
+ return 0;
+}
+
+static void freelist(struct match *head)
+{
+ struct match *match, *next;
+ for (match=head->next; match; match=next) {
+ next = match->next;
+ free(match);
+ }
+}
+
+static int sort(const void *a, const void *b)
+{
+ return strcmp(*(const char **)a, *(const char **)b);
+}
+
+#ifdef __wasilibc_unmodified_upstream // WASI has no usernames
+static int expand_tilde(char **pat, char *buf, size_t *pos)
+{
+ char *p = *pat + 1;
+ size_t i = 0;
+
+ char delim, *name_end = __strchrnul(p, '/');
+ if ((delim = *name_end)) *name_end++ = 0;
+ *pat = name_end;
+
+ char *home = *p ? NULL : getenv("HOME");
+ if (!home) {
+ struct passwd pw, *res;
+ switch (*p ? getpwnam_r(p, &pw, buf, PATH_MAX, &res)
+ : getpwuid_r(getuid(), &pw, buf, PATH_MAX, &res)) {
+ case ENOMEM:
+ return GLOB_NOSPACE;
+ case 0:
+ if (!res)
+ default:
+ return GLOB_NOMATCH;
+ }
+ home = pw.pw_dir;
+ }
+ while (i < PATH_MAX - 2 && *home)
+ buf[i++] = *home++;
+ if (*home)
+ return GLOB_NOMATCH;
+ if ((buf[i] = delim))
+ buf[++i] = 0;
+ *pos = i;
+ return 0;
+}
+#endif
+
+int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
+{
+ struct match head = { .next = NULL }, *tail = &head;
+ size_t cnt, i;
+ size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
+ int error = 0;
+ char buf[PATH_MAX];
+
+ if (!errfunc) errfunc = ignore_err;
+
+ if (!(flags & GLOB_APPEND)) {
+ g->gl_offs = offs;
+ g->gl_pathc = 0;
+ g->gl_pathv = NULL;
+ }
+
+ if (*pat) {
+ char *p = strdup(pat);
+ if (!p) return GLOB_NOSPACE;
+ buf[0] = 0;
+ size_t pos = 0;
+ char *s = p;
+#ifdef __wasilibc_unmodified_upstream // WASI has no usernames
+ if ((flags & (GLOB_TILDE | GLOB_TILDE_CHECK)) && *p == '~')
+ error = expand_tilde(&s, buf, &pos);
+#endif
+ if (!error)
+ error = do_glob(buf, pos, 0, s, flags, errfunc, &tail);
+ free(p);
+ }
+
+ if (error == GLOB_NOSPACE) {
+ freelist(&head);
+ return error;
+ }
+
+ for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
+ if (!cnt) {
+ if (flags & GLOB_NOCHECK) {
+ tail = &head;
+ if (append(&tail, pat, strlen(pat), 0))
+ return GLOB_NOSPACE;
+ cnt++;
+ } else
+ return GLOB_NOMATCH;
+ }
+
+ if (flags & GLOB_APPEND) {
+ char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
+ if (!pathv) {
+ freelist(&head);
+ return GLOB_NOSPACE;
+ }
+ g->gl_pathv = pathv;
+ offs += g->gl_pathc;
+ } else {
+ g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
+ if (!g->gl_pathv) {
+ freelist(&head);
+ return GLOB_NOSPACE;
+ }
+ for (i=0; i<offs; i++)
+ g->gl_pathv[i] = NULL;
+ }
+ for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
+ g->gl_pathv[offs + i] = tail->name;
+ g->gl_pathv[offs + i] = NULL;
+ g->gl_pathc += cnt;
+
+ if (!(flags & GLOB_NOSORT))
+ qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
+
+ return error;
+}
+
+void globfree(glob_t *g)
+{
+ size_t i;
+ for (i=0; i<g->gl_pathc; i++)
+ free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
+ free(g->gl_pathv);
+ g->gl_pathc = 0;
+ g->gl_pathv = NULL;
+}
+
+weak_alias(glob, glob64);
+weak_alias(globfree, globfree64);
diff --git a/libc-top-half/musl/src/regex/regcomp.c b/libc-top-half/musl/src/regex/regcomp.c
new file mode 100644
index 0000000..fb24556
--- /dev/null
+++ b/libc-top-half/musl/src/regex/regcomp.c
@@ -0,0 +1,2953 @@
+/*
+ regcomp.c - TRE POSIX compatible regex compilation functions.
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <string.h>
+#include <stdlib.h>
+#include <regex.h>
+#include <limits.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include "tre.h"
+
+#include <assert.h>
+
+/***********************************************************************
+ from tre-compile.h
+***********************************************************************/
+
+typedef struct {
+ int position;
+ int code_min;
+ int code_max;
+ int *tags;
+ int assertions;
+ tre_ctype_t class;
+ tre_ctype_t *neg_classes;
+ int backref;
+} tre_pos_and_tags_t;
+
+
+/***********************************************************************
+ from tre-ast.c and tre-ast.h
+***********************************************************************/
+
+/* The different AST node types. */
+typedef enum {
+ LITERAL,
+ CATENATION,
+ ITERATION,
+ UNION
+} tre_ast_type_t;
+
+/* Special subtypes of TRE_LITERAL. */
+#define EMPTY -1 /* Empty leaf (denotes empty string). */
+#define ASSERTION -2 /* Assertion leaf. */
+#define TAG -3 /* Tag leaf. */
+#define BACKREF -4 /* Back reference leaf. */
+
+#define IS_SPECIAL(x) ((x)->code_min < 0)
+#define IS_EMPTY(x) ((x)->code_min == EMPTY)
+#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
+#define IS_TAG(x) ((x)->code_min == TAG)
+#define IS_BACKREF(x) ((x)->code_min == BACKREF)
+
+
+/* A generic AST node. All AST nodes consist of this node on the top
+ level with `obj' pointing to the actual content. */
+typedef struct {
+ tre_ast_type_t type; /* Type of the node. */
+ void *obj; /* Pointer to actual node. */
+ int nullable;
+ int submatch_id;
+ int num_submatches;
+ int num_tags;
+ tre_pos_and_tags_t *firstpos;
+ tre_pos_and_tags_t *lastpos;
+} tre_ast_node_t;
+
+
+/* A "literal" node. These are created for assertions, back references,
+ tags, matching parameter settings, and all expressions that match one
+ character. */
+typedef struct {
+ long code_min;
+ long code_max;
+ int position;
+ tre_ctype_t class;
+ tre_ctype_t *neg_classes;
+} tre_literal_t;
+
+/* A "catenation" node. These are created when two regexps are concatenated.
+ If there are more than one subexpressions in sequence, the `left' part
+ holds all but the last, and `right' part holds the last subexpression
+ (catenation is left associative). */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_catenation_t;
+
+/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
+ operators. */
+typedef struct {
+ /* Subexpression to match. */
+ tre_ast_node_t *arg;
+ /* Minimum number of consecutive matches. */
+ int min;
+ /* Maximum number of consecutive matches. */
+ int max;
+ /* If 0, match as many characters as possible, if 1 match as few as
+ possible. Note that this does not always mean the same thing as
+ matching as many/few repetitions as possible. */
+ unsigned int minimal:1;
+} tre_iteration_t;
+
+/* An "union" node. These are created for the "|" operator. */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_union_t;
+
+
+static tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, int type, void *obj)
+{
+ tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
+ if (!node || !obj)
+ return 0;
+ node->obj = obj;
+ node->type = type;
+ node->nullable = -1;
+ node->submatch_id = -1;
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
+{
+ tre_ast_node_t *node;
+ tre_literal_t *lit;
+
+ lit = tre_mem_calloc(mem, sizeof *lit);
+ node = tre_ast_new_node(mem, LITERAL, lit);
+ if (!node)
+ return 0;
+ lit->code_min = code_min;
+ lit->code_max = code_max;
+ lit->position = position;
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
+{
+ tre_ast_node_t *node;
+ tre_iteration_t *iter;
+
+ iter = tre_mem_calloc(mem, sizeof *iter);
+ node = tre_ast_new_node(mem, ITERATION, iter);
+ if (!node)
+ return 0;
+ iter->arg = arg;
+ iter->min = min;
+ iter->max = max;
+ iter->minimal = minimal;
+ node->num_submatches = arg->num_submatches;
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+ tre_union_t *un;
+
+ if (!left)
+ return right;
+ un = tre_mem_calloc(mem, sizeof *un);
+ node = tre_ast_new_node(mem, UNION, un);
+ if (!node || !right)
+ return 0;
+ un->left = left;
+ un->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+ tre_catenation_t *cat;
+
+ if (!left)
+ return right;
+ cat = tre_mem_calloc(mem, sizeof *cat);
+ node = tre_ast_new_node(mem, CATENATION, cat);
+ if (!node)
+ return 0;
+ cat->left = left;
+ cat->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+ return node;
+}
+
+
+/***********************************************************************
+ from tre-stack.c and tre-stack.h
+***********************************************************************/
+
+typedef struct tre_stack_rec tre_stack_t;
+
+/* Creates a new stack object. `size' is initial size in bytes, `max_size'
+ is maximum size, and `increment' specifies how much more space will be
+ allocated with realloc() if all space gets used up. Returns the stack
+ object or NULL if out of memory. */
+static tre_stack_t *
+tre_stack_new(int size, int max_size, int increment);
+
+/* Frees the stack object. */
+static void
+tre_stack_destroy(tre_stack_t *s);
+
+/* Returns the current number of objects in the stack. */
+static int
+tre_stack_num_objects(tre_stack_t *s);
+
+/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
+ `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
+ This tries to realloc() more space before failing if maximum size
+ has not yet been reached. Returns REG_OK if successful. */
+#define declare_pushf(typetag, type) \
+ static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
+
+declare_pushf(voidptr, void *);
+declare_pushf(int, int);
+
+/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
+ element off of stack `s' and returns it. The stack must not be
+ empty. */
+#define declare_popf(typetag, type) \
+ static type tre_stack_pop_ ## typetag(tre_stack_t *s)
+
+declare_popf(voidptr, void *);
+declare_popf(int, int);
+
+/* Just to save some typing. */
+#define STACK_PUSH(s, typetag, value) \
+ do \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ } \
+ while (/*CONSTCOND*/0)
+
+#define STACK_PUSHX(s, typetag, value) \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ if (status != REG_OK) \
+ break; \
+ }
+
+#define STACK_PUSHR(s, typetag, value) \
+ { \
+ reg_errcode_t _status; \
+ _status = tre_stack_push_ ## typetag(s, value); \
+ if (_status != REG_OK) \
+ return _status; \
+ }
+
+union tre_stack_item {
+ void *voidptr_value;
+ int int_value;
+};
+
+struct tre_stack_rec {
+ int size;
+ int max_size;
+ int increment;
+ int ptr;
+ union tre_stack_item *stack;
+};
+
+
+static tre_stack_t *
+tre_stack_new(int size, int max_size, int increment)
+{
+ tre_stack_t *s;
+
+ s = xmalloc(sizeof(*s));
+ if (s != NULL)
+ {
+ s->stack = xmalloc(sizeof(*s->stack) * size);
+ if (s->stack == NULL)
+ {
+ xfree(s);
+ return NULL;
+ }
+ s->size = size;
+ s->max_size = max_size;
+ s->increment = increment;
+ s->ptr = 0;
+ }
+ return s;
+}
+
+static void
+tre_stack_destroy(tre_stack_t *s)
+{
+ xfree(s->stack);
+ xfree(s);
+}
+
+static int
+tre_stack_num_objects(tre_stack_t *s)
+{
+ return s->ptr;
+}
+
+static reg_errcode_t
+tre_stack_push(tre_stack_t *s, union tre_stack_item value)
+{
+ if (s->ptr < s->size)
+ {
+ s->stack[s->ptr] = value;
+ s->ptr++;
+ }
+ else
+ {
+ if (s->size >= s->max_size)
+ {
+ return REG_ESPACE;
+ }
+ else
+ {
+ union tre_stack_item *new_buffer;
+ int new_size;
+ new_size = s->size + s->increment;
+ if (new_size > s->max_size)
+ new_size = s->max_size;
+ new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
+ if (new_buffer == NULL)
+ {
+ return REG_ESPACE;
+ }
+ assert(new_size > s->size);
+ s->size = new_size;
+ s->stack = new_buffer;
+ tre_stack_push(s, value);
+ }
+ }
+ return REG_OK;
+}
+
+#define define_pushf(typetag, type) \
+ declare_pushf(typetag, type) { \
+ union tre_stack_item item; \
+ item.typetag ## _value = value; \
+ return tre_stack_push(s, item); \
+}
+
+define_pushf(int, int)
+define_pushf(voidptr, void *)
+
+#define define_popf(typetag, type) \
+ declare_popf(typetag, type) { \
+ return s->stack[--s->ptr].typetag ## _value; \
+ }
+
+define_popf(int, int)
+define_popf(voidptr, void *)
+
+
+/***********************************************************************
+ from tre-parse.c and tre-parse.h
+***********************************************************************/
+
+/* Parse context. */
+typedef struct {
+ /* Memory allocator. The AST is allocated using this. */
+ tre_mem_t mem;
+ /* Stack used for keeping track of regexp syntax. */
+ tre_stack_t *stack;
+ /* The parsed node after a parse function returns. */
+ tre_ast_node_t *n;
+ /* Position in the regexp pattern after a parse function returns. */
+ const char *s;
+ /* The first character of the last subexpression parsed. */
+ const char *start;
+ /* Current submatch ID. */
+ int submatch_id;
+ /* Current position (number of literal). */
+ int position;
+ /* The highest back reference or -1 if none seen so far. */
+ int max_backref;
+ /* Compilation flags. */
+ int cflags;
+} tre_parse_ctx_t;
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct {
+ char c;
+ const char *expansion;
+} tre_macros[] = {
+ {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
+ {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
+ {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+ {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
+ { 0, 0 }
+};
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+ must have at least `len' items. Sets buf[0] to zero if the there
+ is no match in `tre_macros'. */
+static const char *tre_expand_macro(const char *s)
+{
+ int i;
+ for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
+ return tre_macros[i].expansion;
+}
+
+static int
+tre_compare_lit(const void *a, const void *b)
+{
+ const tre_literal_t *const *la = a;
+ const tre_literal_t *const *lb = b;
+ /* assumes the range of valid code_min is < INT_MAX */
+ return la[0]->code_min - lb[0]->code_min;
+}
+
+struct literals {
+ tre_mem_t mem;
+ tre_literal_t **a;
+ int len;
+ int cap;
+};
+
+static tre_literal_t *tre_new_lit(struct literals *p)
+{
+ tre_literal_t **a;
+ if (p->len >= p->cap) {
+ if (p->cap >= 1<<15)
+ return 0;
+ p->cap *= 2;
+ a = xrealloc(p->a, p->cap * sizeof *p->a);
+ if (!a)
+ return 0;
+ p->a = a;
+ }
+ a = p->a + p->len++;
+ *a = tre_mem_calloc(p->mem, sizeof **a);
+ return *a;
+}
+
+static int add_icase_literals(struct literals *ls, int min, int max)
+{
+ tre_literal_t *lit;
+ int b, e, c;
+ for (c=min; c<=max; ) {
+ /* assumes islower(c) and isupper(c) are exclusive
+ and toupper(c)!=c if islower(c).
+ multiple opposite case characters are not supported */
+ if (tre_islower(c)) {
+ b = e = tre_toupper(c);
+ for (c++, e++; c<=max; c++, e++)
+ if (tre_toupper(c) != e) break;
+ } else if (tre_isupper(c)) {
+ b = e = tre_tolower(c);
+ for (c++, e++; c<=max; c++, e++)
+ if (tre_tolower(c) != e) break;
+ } else {
+ c++;
+ continue;
+ }
+ lit = tre_new_lit(ls);
+ if (!lit)
+ return -1;
+ lit->code_min = b;
+ lit->code_max = e-1;
+ lit->position = -1;
+ }
+ return 0;
+}
+
+
+/* Maximum number of character classes in a negated bracket expression. */
+#define MAX_NEG_CLASSES 64
+
+struct neg {
+ int negate;
+ int len;
+ tre_ctype_t a[MAX_NEG_CLASSES];
+};
+
+// TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
+
+/*
+bracket grammar:
+Bracket = '[' List ']' | '[^' List ']'
+List = Term | List Term
+Term = Char | Range | Chclass | Eqclass
+Range = Char '-' Char | Char '-' '-'
+Char = Coll | coll_single
+Meta = ']' | '-'
+Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
+Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
+Chclass = '[:' class ':]'
+
+coll_single is a single char collating element but it can be
+ '-' only at the beginning or end of a List and
+ ']' only at the beginning of a List and
+ '^' anywhere except after the openning '['
+*/
+
+static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
+{
+ const char *start = s;
+ tre_ctype_t class;
+ int min, max;
+ wchar_t wc;
+ int len;
+
+ for (;;) {
+ class = 0;
+ len = mbtowc(&wc, s, -1);
+ if (len <= 0)
+ return *s ? REG_BADPAT : REG_EBRACK;
+ if (*s == ']' && s != start) {
+ ctx->s = s+1;
+ return REG_OK;
+ }
+ if (*s == '-' && s != start && s[1] != ']' &&
+ /* extension: [a-z--@] is accepted as [a-z]|[--@] */
+ (s[1] != '-' || s[2] == ']'))
+ return REG_ERANGE;
+ if (*s == '[' && (s[1] == '.' || s[1] == '='))
+ /* collating symbols and equivalence classes are not supported */
+ return REG_ECOLLATE;
+ if (*s == '[' && s[1] == ':') {
+ char tmp[CHARCLASS_NAME_MAX+1];
+ s += 2;
+ for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
+ if (s[len] == ':') {
+ memcpy(tmp, s, len);
+ tmp[len] = 0;
+ class = tre_ctype(tmp);
+ break;
+ }
+ }
+ if (!class || s[len+1] != ']')
+ return REG_ECTYPE;
+ min = 0;
+ max = TRE_CHAR_MAX;
+ s += len+2;
+ } else {
+ min = max = wc;
+ s += len;
+ if (*s == '-' && s[1] != ']') {
+ s++;
+ len = mbtowc(&wc, s, -1);
+ max = wc;
+ /* XXX - Should use collation order instead of
+ encoding values in character ranges. */
+ if (len <= 0 || min > max)
+ return REG_ERANGE;
+ s += len;
+ }
+ }
+
+ if (class && neg->negate) {
+ if (neg->len >= MAX_NEG_CLASSES)
+ return REG_ESPACE;
+ neg->a[neg->len++] = class;
+ } else {
+ tre_literal_t *lit = tre_new_lit(ls);
+ if (!lit)
+ return REG_ESPACE;
+ lit->code_min = min;
+ lit->code_max = max;
+ lit->class = class;
+ lit->position = -1;
+
+ /* Add opposite-case codepoints if REG_ICASE is present.
+ It seems that POSIX requires that bracket negation
+ should happen before case-folding, but most practical
+ implementations do it the other way around. Changing
+ the order would need efficient representation of
+ case-fold ranges and bracket range sets even with
+ simple patterns so this is ok for now. */
+ if (ctx->cflags & REG_ICASE && !class)
+ if (add_icase_literals(ls, min, max))
+ return REG_ESPACE;
+ }
+ }
+}
+
+static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
+{
+ int i, max, min, negmax, negmin;
+ tre_ast_node_t *node = 0, *n;
+ tre_ctype_t *nc = 0;
+ tre_literal_t *lit;
+ struct literals ls;
+ struct neg neg;
+ reg_errcode_t err;
+
+ ls.mem = ctx->mem;
+ ls.len = 0;
+ ls.cap = 32;
+ ls.a = xmalloc(ls.cap * sizeof *ls.a);
+ if (!ls.a)
+ return REG_ESPACE;
+ neg.len = 0;
+ neg.negate = *s == '^';
+ if (neg.negate)
+ s++;
+
+ err = parse_bracket_terms(ctx, s, &ls, &neg);
+ if (err != REG_OK)
+ goto parse_bracket_done;
+
+ if (neg.negate) {
+ /*
+ * With REG_NEWLINE, POSIX requires that newlines are not matched by
+ * any form of a non-matching list.
+ */
+ if (ctx->cflags & REG_NEWLINE) {
+ lit = tre_new_lit(&ls);
+ if (!lit) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ lit->code_min = '\n';
+ lit->code_max = '\n';
+ lit->position = -1;
+ }
+ /* Sort the array if we need to negate it. */
+ qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
+ /* extra lit for the last negated range */
+ lit = tre_new_lit(&ls);
+ if (!lit) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ lit->code_min = TRE_CHAR_MAX+1;
+ lit->code_max = TRE_CHAR_MAX+1;
+ lit->position = -1;
+ /* negated classes */
+ if (neg.len) {
+ nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
+ if (!nc) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ memcpy(nc, neg.a, neg.len*sizeof *neg.a);
+ nc[neg.len] = 0;
+ }
+ }
+
+ /* Build a union of the items in the array, negated if necessary. */
+ negmax = negmin = 0;
+ for (i = 0; i < ls.len; i++) {
+ lit = ls.a[i];
+ min = lit->code_min;
+ max = lit->code_max;
+ if (neg.negate) {
+ if (min <= negmin) {
+ /* Overlap. */
+ negmin = MAX(max + 1, negmin);
+ continue;
+ }
+ negmax = min - 1;
+ lit->code_min = negmin;
+ lit->code_max = negmax;
+ negmin = max + 1;
+ }
+ lit->position = ctx->position;
+ lit->neg_classes = nc;
+ n = tre_ast_new_node(ctx->mem, LITERAL, lit);
+ node = tre_ast_new_union(ctx->mem, node, n);
+ if (!node) {
+ err = REG_ESPACE;
+ break;
+ }
+ }
+
+parse_bracket_done:
+ xfree(ls.a);
+ ctx->position++;
+ ctx->n = node;
+ return err;
+}
+
+static const char *parse_dup_count(const char *s, int *n)
+{
+ *n = -1;
+ if (!isdigit(*s))
+ return s;
+ *n = 0;
+ for (;;) {
+ *n = 10 * *n + (*s - '0');
+ s++;
+ if (!isdigit(*s) || *n > RE_DUP_MAX)
+ break;
+ }
+ return s;
+}
+
+static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
+{
+ int min, max;
+
+ s = parse_dup_count(s, &min);
+ if (*s == ',')
+ s = parse_dup_count(s+1, &max);
+ else
+ max = min;
+
+ if (
+ (max < min && max >= 0) ||
+ max > RE_DUP_MAX ||
+ min > RE_DUP_MAX ||
+ min < 0 ||
+ (!ere && *s++ != '\\') ||
+ *s++ != '}'
+ )
+ return 0;
+ *pmin = min;
+ *pmax = max;
+ return s;
+}
+
+static int hexval(unsigned c)
+{
+ if (c-'0'<10) return c-'0';
+ c |= 32;
+ if (c-'a'<6) return c-'a'+10;
+ return -1;
+}
+
+static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
+{
+ if (node->submatch_id >= 0) {
+ tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (!n)
+ return REG_ESPACE;
+ n = tre_ast_new_catenation(ctx->mem, n, node);
+ if (!n)
+ return REG_ESPACE;
+ n->num_submatches = node->num_submatches;
+ node = n;
+ }
+ node->submatch_id = subid;
+ node->num_submatches++;
+ ctx->n = node;
+ return REG_OK;
+}
+
+/*
+BRE grammar:
+Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
+Branch = Atom | Branch Atom
+Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
+Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
+
+(leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
+
+ERE grammar:
+Regex = Branch | Regex '|' Branch
+Branch = Atom | Branch Atom
+Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
+Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
+
+(a*+?, ^*, $+, \X, {, (|a) are unspecified)
+*/
+
+static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
+{
+ int len, ere = ctx->cflags & REG_EXTENDED;
+ const char *p;
+ tre_ast_node_t *node;
+ wchar_t wc;
+ switch (*s) {
+ case '[':
+ return parse_bracket(ctx, s+1);
+ case '\\':
+ p = tre_expand_macro(s+1);
+ if (p) {
+ /* assume \X expansion is a single atom */
+ reg_errcode_t err = parse_atom(ctx, p);
+ ctx->s = s+2;
+ return err;
+ }
+ /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
+ switch (*++s) {
+ case 0:
+ return REG_EESCAPE;
+ case 'b':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
+ break;
+ case 'B':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
+ break;
+ case '<':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
+ break;
+ case '>':
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
+ break;
+ case 'x':
+ s++;
+ int i, v = 0, c;
+ len = 2;
+ if (*s == '{') {
+ len = 8;
+ s++;
+ }
+ for (i=0; i<len && v<0x110000; i++) {
+ c = hexval(s[i]);
+ if (c < 0) break;
+ v = 16*v + c;
+ }
+ s += i;
+ if (len == 8) {
+ if (*s != '}')
+ return REG_EBRACE;
+ s++;
+ }
+ node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
+ s--;
+ break;
+ case '{':
+ case '+':
+ case '?':
+ /* extension: treat \+, \? as repetitions in BRE */
+ /* reject repetitions after empty expression in BRE */
+ if (!ere)
+ return REG_BADRPT;
+ case '|':
+ /* extension: treat \| as alternation in BRE */
+ if (!ere) {
+ node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ s--;
+ goto end;
+ }
+ /* fallthrough */
+ default:
+ if (!ere && (unsigned)*s-'1' < 9) {
+ /* back reference */
+ int val = *s - '0';
+ node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
+ ctx->max_backref = MAX(val, ctx->max_backref);
+ } else {
+ /* extension: accept unknown escaped char
+ as a literal */
+ goto parse_literal;
+ }
+ }
+ s++;
+ break;
+ case '.':
+ if (ctx->cflags & REG_NEWLINE) {
+ tre_ast_node_t *tmp1, *tmp2;
+ tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
+ tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
+ if (tmp1 && tmp2)
+ node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ else
+ node = 0;
+ } else {
+ node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
+ }
+ s++;
+ break;
+ case '^':
+ /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
+ if (!ere && s != ctx->start)
+ goto parse_literal;
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
+ s++;
+ break;
+ case '$':
+ /* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */
+ if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|')))
+ goto parse_literal;
+ node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
+ s++;
+ break;
+ case '*':
+ case '{':
+ case '+':
+ case '?':
+ /* reject repetitions after empty expression in ERE */
+ if (ere)
+ return REG_BADRPT;
+ case '|':
+ if (!ere)
+ goto parse_literal;
+ case 0:
+ node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ break;
+ default:
+parse_literal:
+ len = mbtowc(&wc, s, -1);
+ if (len < 0)
+ return REG_BADPAT;
+ if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
+ tre_ast_node_t *tmp1, *tmp2;
+ /* multiple opposite case characters are not supported */
+ tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
+ tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
+ if (tmp1 && tmp2)
+ node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+ else
+ node = 0;
+ } else {
+ node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
+ }
+ ctx->position++;
+ s += len;
+ break;
+ }
+end:
+ if (!node)
+ return REG_ESPACE;
+ ctx->n = node;
+ ctx->s = s;
+ return REG_OK;
+}
+
+#define PUSHPTR(err, s, v) do { \
+ if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
+ return err; \
+} while(0)
+
+#define PUSHINT(err, s, v) do { \
+ if ((err = tre_stack_push_int(s, v)) != REG_OK) \
+ return err; \
+} while(0)
+
+static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
+{
+ tre_ast_node_t *nbranch=0, *nunion=0;
+ int ere = ctx->cflags & REG_EXTENDED;
+ const char *s = ctx->start;
+ int subid = 0;
+ int depth = 0;
+ reg_errcode_t err;
+ tre_stack_t *stack = ctx->stack;
+
+ PUSHINT(err, stack, subid++);
+ for (;;) {
+ if ((!ere && *s == '\\' && s[1] == '(') ||
+ (ere && *s == '(')) {
+ PUSHPTR(err, stack, nunion);
+ PUSHPTR(err, stack, nbranch);
+ PUSHINT(err, stack, subid++);
+ s++;
+ if (!ere)
+ s++;
+ depth++;
+ nbranch = nunion = 0;
+ ctx->start = s;
+ continue;
+ }
+ if ((!ere && *s == '\\' && s[1] == ')') ||
+ (ere && *s == ')' && depth)) {
+ ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (!ctx->n)
+ return REG_ESPACE;
+ } else {
+ err = parse_atom(ctx, s);
+ if (err != REG_OK)
+ return err;
+ s = ctx->s;
+ }
+
+ parse_iter:
+ for (;;) {
+ int min, max;
+
+ if (*s!='\\' && *s!='*') {
+ if (!ere)
+ break;
+ if (*s!='+' && *s!='?' && *s!='{')
+ break;
+ }
+ if (*s=='\\' && ere)
+ break;
+ /* extension: treat \+, \? as repetitions in BRE */
+ if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
+ break;
+ if (*s=='\\')
+ s++;
+
+ /* handle ^* at the start of a BRE. */
+ if (!ere && s==ctx->start+1 && s[-1]=='^')
+ break;
+
+ /* extension: multiple consecutive *+?{,} is unspecified,
+ but (a+)+ has to be supported so accepting a++ makes
+ sense, note however that the RE_DUP_MAX limit can be
+ circumvented: (a{255}){255} uses a lot of memory.. */
+ if (*s=='{') {
+ s = parse_dup(s+1, ere, &min, &max);
+ if (!s)
+ return REG_BADBR;
+ } else {
+ min=0;
+ max=-1;
+ if (*s == '+')
+ min = 1;
+ if (*s == '?')
+ max = 1;
+ s++;
+ }
+ if (max == 0)
+ ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ else
+ ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
+ if (!ctx->n)
+ return REG_ESPACE;
+ }
+
+ nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
+ if ((ere && *s == '|') ||
+ (ere && *s == ')' && depth) ||
+ (!ere && *s == '\\' && s[1] == ')') ||
+ /* extension: treat \| as alternation in BRE */
+ (!ere && *s == '\\' && s[1] == '|') ||
+ !*s) {
+ /* extension: empty branch is unspecified (), (|a), (a|)
+ here they are not rejected but match on empty string */
+ int c = *s;
+ nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
+ nbranch = 0;
+
+ if (c == '\\' && s[1] == '|') {
+ s+=2;
+ ctx->start = s;
+ } else if (c == '|') {
+ s++;
+ ctx->start = s;
+ } else {
+ if (c == '\\') {
+ if (!depth) return REG_EPAREN;
+ s+=2;
+ } else if (c == ')')
+ s++;
+ depth--;
+ err = marksub(ctx, nunion, tre_stack_pop_int(stack));
+ if (err != REG_OK)
+ return err;
+ if (!c && depth<0) {
+ ctx->submatch_id = subid;
+ return REG_OK;
+ }
+ if (!c || depth<0)
+ return REG_EPAREN;
+ nbranch = tre_stack_pop_voidptr(stack);
+ nunion = tre_stack_pop_voidptr(stack);
+ goto parse_iter;
+ }
+ }
+ }
+}
+
+
+/***********************************************************************
+ from tre-compile.c
+***********************************************************************/
+
+
+/*
+ TODO:
+ - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
+ function calls.
+*/
+
+/*
+ Algorithms to setup tags so that submatch addressing can be done.
+*/
+
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+ As the left child a new tag with number `tag_id' to `node' is added,
+ and the right child is the old root. */
+static reg_errcode_t
+tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+ tre_catenation_t *c;
+
+ c = tre_mem_alloc(mem, sizeof(*c));
+ if (c == NULL)
+ return REG_ESPACE;
+ c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
+ if (c->left == NULL)
+ return REG_ESPACE;
+ c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+ if (c->right == NULL)
+ return REG_ESPACE;
+
+ c->right->obj = node->obj;
+ c->right->type = node->type;
+ c->right->nullable = -1;
+ c->right->submatch_id = -1;
+ c->right->firstpos = NULL;
+ c->right->lastpos = NULL;
+ c->right->num_tags = 0;
+ c->right->num_submatches = 0;
+ node->obj = c;
+ node->type = CATENATION;
+ return REG_OK;
+}
+
+/* Inserts a catenation node to the root of the tree given in `node'.
+ As the right child a new tag with number `tag_id' to `node' is added,
+ and the left child is the old root. */
+static reg_errcode_t
+tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
+{
+ tre_catenation_t *c;
+
+ c = tre_mem_alloc(mem, sizeof(*c));
+ if (c == NULL)
+ return REG_ESPACE;
+ c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
+ if (c->right == NULL)
+ return REG_ESPACE;
+ c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
+ if (c->left == NULL)
+ return REG_ESPACE;
+
+ c->left->obj = node->obj;
+ c->left->type = node->type;
+ c->left->nullable = -1;
+ c->left->submatch_id = -1;
+ c->left->firstpos = NULL;
+ c->left->lastpos = NULL;
+ c->left->num_tags = 0;
+ c->left->num_submatches = 0;
+ node->obj = c;
+ node->type = CATENATION;
+ return REG_OK;
+}
+
+typedef enum {
+ ADDTAGS_RECURSE,
+ ADDTAGS_AFTER_ITERATION,
+ ADDTAGS_AFTER_UNION_LEFT,
+ ADDTAGS_AFTER_UNION_RIGHT,
+ ADDTAGS_AFTER_CAT_LEFT,
+ ADDTAGS_AFTER_CAT_RIGHT,
+ ADDTAGS_SET_SUBMATCH_END
+} tre_addtags_symbol_t;
+
+
+typedef struct {
+ int tag;
+ int next_tag;
+} tre_tag_states_t;
+
+
+/* Go through `regset' and set submatch data for submatches that are
+ using this tag. */
+static void
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
+{
+ int i;
+
+ for (i = 0; regset[i] >= 0; i++)
+ {
+ int id = regset[i] / 2;
+ int start = !(regset[i] % 2);
+ if (start)
+ tnfa->submatch_data[id].so_tag = tag;
+ else
+ tnfa->submatch_data[id].eo_tag = tag;
+ }
+ regset[0] = -1;
+}
+
+
+/* Adds tags to appropriate locations in the parse tree in `tree', so that
+ subexpressions marked for submatch addressing can be traced. */
+static reg_errcode_t
+tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
+ tre_tnfa_t *tnfa)
+{
+ reg_errcode_t status = REG_OK;
+ tre_addtags_symbol_t symbol;
+ tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
+ int bottom = tre_stack_num_objects(stack);
+ /* True for first pass (counting number of needed tags) */
+ int first_pass = (mem == NULL || tnfa == NULL);
+ int *regset, *orig_regset;
+ int num_tags = 0; /* Total number of tags. */
+ int num_minimals = 0; /* Number of special minimal tags. */
+ int tag = 0; /* The tag that is to be added next. */
+ int next_tag = 1; /* Next tag to use after this one. */
+ int *parents; /* Stack of submatches the current submatch is
+ contained in. */
+ int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
+ tre_tag_states_t *saved_states;
+
+ tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
+ if (!first_pass)
+ {
+ tnfa->end_tag = 0;
+ tnfa->minimal_tags[0] = -1;
+ }
+
+ regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
+ if (regset == NULL)
+ return REG_ESPACE;
+ regset[0] = -1;
+ orig_regset = regset;
+
+ parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
+ if (parents == NULL)
+ {
+ xfree(regset);
+ return REG_ESPACE;
+ }
+ parents[0] = -1;
+
+ saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
+ if (saved_states == NULL)
+ {
+ xfree(regset);
+ xfree(parents);
+ return REG_ESPACE;
+ }
+ else
+ {
+ unsigned int i;
+ for (i = 0; i <= tnfa->num_submatches; i++)
+ saved_states[i].tag = -1;
+ }
+
+ STACK_PUSH(stack, voidptr, node);
+ STACK_PUSH(stack, int, ADDTAGS_RECURSE);
+
+ while (tre_stack_num_objects(stack) > bottom)
+ {
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+
+ case ADDTAGS_SET_SUBMATCH_END:
+ {
+ int id = tre_stack_pop_int(stack);
+ int i;
+
+ /* Add end of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++);
+ regset[i] = id * 2 + 1;
+ regset[i + 1] = -1;
+
+ /* Pop this submatch from the parents stack. */
+ for (i = 0; parents[i] >= 0; i++);
+ parents[i - 1] = -1;
+ break;
+ }
+
+ case ADDTAGS_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+
+ if (node->submatch_id >= 0)
+ {
+ int id = node->submatch_id;
+ int i;
+
+
+ /* Add start of this submatch to regset. */
+ for (i = 0; regset[i] >= 0; i++);
+ regset[i] = id * 2;
+ regset[i + 1] = -1;
+
+ if (!first_pass)
+ {
+ for (i = 0; parents[i] >= 0; i++);
+ tnfa->submatch_data[id].parents = NULL;
+ if (i > 0)
+ {
+ int *p = xmalloc(sizeof(*p) * (i + 1));
+ if (p == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ assert(tnfa->submatch_data[id].parents == NULL);
+ tnfa->submatch_data[id].parents = p;
+ for (i = 0; parents[i] >= 0; i++)
+ p[i] = parents[i];
+ p[i] = -1;
+ }
+ }
+
+ /* Add end of this submatch to regset after processing this
+ node. */
+ STACK_PUSHX(stack, int, node->submatch_id);
+ STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
+ }
+
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = node->obj;
+
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ int i;
+ if (regset[0] >= 0)
+ {
+ /* Regset is not empty, so add a tag before the
+ literal or backref. */
+ if (!first_pass)
+ {
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+ else
+ {
+ node->num_tags = 1;
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ }
+ else
+ {
+ assert(!IS_TAG(lit));
+ }
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ tre_ast_node_t *left = cat->left;
+ tre_ast_node_t *right = cat->right;
+ int reserved_tag = -1;
+
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, next_tag + left->num_tags);
+ if (left->num_tags > 0 && right->num_tags > 0)
+ {
+ /* Reserve the next tag to the right child. */
+ reserved_tag = next_tag;
+ next_tag++;
+ }
+ STACK_PUSHX(stack, int, reserved_tag);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ }
+ break;
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+
+ if (first_pass)
+ {
+ STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
+ }
+ else
+ {
+ STACK_PUSHX(stack, int, tag);
+ STACK_PUSHX(stack, int, iter->minimal);
+ }
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
+
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0 || iter->minimal)
+ {
+ if (!first_pass)
+ {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ if (iter->minimal)
+ tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
+ else
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+ direction = TRE_TAG_MINIMIZE;
+ }
+ break;
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ tre_ast_node_t *left = uni->left;
+ tre_ast_node_t *right = uni->right;
+ int left_tag;
+ int right_tag;
+
+ if (regset[0] >= 0)
+ {
+ left_tag = next_tag;
+ right_tag = next_tag + 1;
+ }
+ else
+ {
+ left_tag = tag;
+ right_tag = next_tag;
+ }
+
+ /* After processing right child. */
+ STACK_PUSHX(stack, int, right_tag);
+ STACK_PUSHX(stack, int, left_tag);
+ STACK_PUSHX(stack, voidptr, regset);
+ STACK_PUSHX(stack, int, regset[0] >= 0);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
+
+ /* Process right child. */
+ STACK_PUSHX(stack, voidptr, right);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* After processing left child. */
+ STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
+
+ /* Process left child. */
+ STACK_PUSHX(stack, voidptr, left);
+ STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
+
+ /* Regset is not empty, so add a tag here. */
+ if (regset[0] >= 0)
+ {
+ if (!first_pass)
+ {
+ int i;
+ status = tre_add_tag_left(mem, node, tag);
+ tnfa->tag_directions[tag] = direction;
+ if (minimal_tag >= 0)
+ {
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+ tre_purge_regset(regset, tnfa, tag);
+ }
+
+ regset[0] = -1;
+ tag = next_tag;
+ num_tags++;
+ next_tag++;
+ }
+
+ if (node->num_submatches > 0)
+ {
+ /* The next two tags are reserved for markers. */
+ next_tag++;
+ tag = next_tag;
+ next_tag++;
+ }
+
+ break;
+ }
+ }
+
+ if (node->submatch_id >= 0)
+ {
+ int i;
+ /* Push this submatch on the parents stack. */
+ for (i = 0; parents[i] >= 0; i++);
+ parents[i] = node->submatch_id;
+ parents[i + 1] = -1;
+ }
+
+ break; /* end case: ADDTAGS_RECURSE */
+
+ case ADDTAGS_AFTER_ITERATION:
+ {
+ int minimal = 0;
+ int enter_tag;
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass)
+ {
+ node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
+ + tre_stack_pop_int(stack);
+ minimal_tag = -1;
+ }
+ else
+ {
+ minimal = tre_stack_pop_int(stack);
+ enter_tag = tre_stack_pop_int(stack);
+ if (minimal)
+ minimal_tag = enter_tag;
+ }
+
+ if (!first_pass)
+ {
+ if (minimal)
+ direction = TRE_TAG_MINIMIZE;
+ else
+ direction = TRE_TAG_MAXIMIZE;
+ }
+ break;
+ }
+
+ case ADDTAGS_AFTER_CAT_LEFT:
+ {
+ int new_tag = tre_stack_pop_int(stack);
+ next_tag = tre_stack_pop_int(stack);
+ if (new_tag >= 0)
+ {
+ tag = new_tag;
+ }
+ break;
+ }
+
+ case ADDTAGS_AFTER_CAT_RIGHT:
+ node = tre_stack_pop_voidptr(stack);
+ if (first_pass)
+ node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
+ + ((tre_catenation_t *)node->obj)->right->num_tags;
+ break;
+
+ case ADDTAGS_AFTER_UNION_LEFT:
+ /* Lift the bottom of the `regset' array so that when processing
+ the right operand the items currently in the array are
+ invisible. The original bottom was saved at ADDTAGS_UNION and
+ will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
+ while (*regset >= 0)
+ regset++;
+ break;
+
+ case ADDTAGS_AFTER_UNION_RIGHT:
+ {
+ int added_tags, tag_left, tag_right;
+ tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
+ tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
+ node = tre_stack_pop_voidptr(stack);
+ added_tags = tre_stack_pop_int(stack);
+ if (first_pass)
+ {
+ node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
+ + ((tre_union_t *)node->obj)->right->num_tags + added_tags
+ + ((node->num_submatches > 0) ? 2 : 0);
+ }
+ regset = tre_stack_pop_voidptr(stack);
+ tag_left = tre_stack_pop_int(stack);
+ tag_right = tre_stack_pop_int(stack);
+
+ /* Add tags after both children, the left child gets a smaller
+ tag than the right child. This guarantees that we prefer
+ the left child over the right child. */
+ /* XXX - This is not always necessary (if the children have
+ tags which must be seen for every match of that child). */
+ /* XXX - Check if this is the only place where tre_add_tag_right
+ is used. If so, use tre_add_tag_left (putting the tag before
+ the child as opposed after the child) and throw away
+ tre_add_tag_right. */
+ if (node->num_submatches > 0)
+ {
+ if (!first_pass)
+ {
+ status = tre_add_tag_right(mem, left, tag_left);
+ tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
+ if (status == REG_OK)
+ status = tre_add_tag_right(mem, right, tag_right);
+ tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
+ }
+ num_tags += 2;
+ }
+ direction = TRE_TAG_MAXIMIZE;
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+
+ } /* end switch(symbol) */
+ } /* end while(tre_stack_num_objects(stack) > bottom) */
+
+ if (!first_pass)
+ tre_purge_regset(regset, tnfa, tag);
+
+ if (!first_pass && minimal_tag >= 0)
+ {
+ int i;
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
+ tnfa->minimal_tags[i] = tag;
+ tnfa->minimal_tags[i + 1] = minimal_tag;
+ tnfa->minimal_tags[i + 2] = -1;
+ minimal_tag = -1;
+ num_minimals++;
+ }
+
+ assert(tree->num_tags == num_tags);
+ tnfa->end_tag = num_tags;
+ tnfa->num_tags = num_tags;
+ tnfa->num_minimals = num_minimals;
+ xfree(orig_regset);
+ xfree(parents);
+ xfree(saved_states);
+ return status;
+}
+
+
+
+/*
+ AST to TNFA compilation routines.
+*/
+
+typedef enum {
+ COPY_RECURSE,
+ COPY_SET_RESULT_PTR
+} tre_copyast_symbol_t;
+
+/* Flags for tre_copy_ast(). */
+#define COPY_REMOVE_TAGS 1
+#define COPY_MAXIMIZE_FIRST_TAG 2
+
+static reg_errcode_t
+tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+ int flags, int *pos_add, tre_tag_direction_t *tag_directions,
+ tre_ast_node_t **copy, int *max_pos)
+{
+ reg_errcode_t status = REG_OK;
+ int bottom = tre_stack_num_objects(stack);
+ int num_copied = 0;
+ int first_tag = 1;
+ tre_ast_node_t **result = copy;
+ tre_copyast_symbol_t symbol;
+
+ STACK_PUSH(stack, voidptr, ast);
+ STACK_PUSH(stack, int, COPY_RECURSE);
+
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ tre_ast_node_t *node;
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+ case COPY_SET_RESULT_PTR:
+ result = tre_stack_pop_voidptr(stack);
+ break;
+ case COPY_RECURSE:
+ node = tre_stack_pop_voidptr(stack);
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = node->obj;
+ int pos = lit->position;
+ int min = lit->code_min;
+ int max = lit->code_max;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ /* XXX - e.g. [ab] has only one position but two
+ nodes, so we are creating holes in the state space
+ here. Not fatal, just wastes memory. */
+ pos += *pos_add;
+ num_copied++;
+ }
+ else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
+ {
+ /* Change this tag to empty. */
+ min = EMPTY;
+ max = pos = -1;
+ }
+ else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
+ && first_tag)
+ {
+ /* Maximize the first tag. */
+ tag_directions[max] = TRE_TAG_MAXIMIZE;
+ first_tag = 0;
+ }
+ *result = tre_ast_new_literal(mem, min, max, pos);
+ if (*result == NULL)
+ status = REG_ESPACE;
+ else {
+ tre_literal_t *p = (*result)->obj;
+ p->class = lit->class;
+ p->neg_classes = lit->neg_classes;
+ }
+
+ if (pos > *max_pos)
+ *max_pos = pos;
+ break;
+ }
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ tre_union_t *tmp;
+ *result = tre_ast_new_union(mem, uni->left, uni->right);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ result = &tmp->left;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ tre_catenation_t *tmp;
+ *result = tre_ast_new_catenation(mem, cat->left, cat->right);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ tmp = (*result)->obj;
+ tmp->left = NULL;
+ tmp->right = NULL;
+ result = &tmp->left;
+
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ STACK_PUSHX(stack, voidptr, &tmp->right);
+ STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ break;
+ }
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, COPY_RECURSE);
+ *result = tre_ast_new_iter(mem, iter->arg, iter->min,
+ iter->max, iter->minimal);
+ if (*result == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ iter = (*result)->obj;
+ result = &iter->arg;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ }
+ }
+ *pos_add += num_copied;
+ return status;
+}
+
+typedef enum {
+ EXPAND_RECURSE,
+ EXPAND_AFTER_ITER
+} tre_expand_ast_symbol_t;
+
+/* Expands each iteration node that has a finite nonzero minimum or maximum
+ iteration count to a catenated sequence of copies of the node. */
+static reg_errcode_t
+tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
+ int *position, tre_tag_direction_t *tag_directions)
+{
+ reg_errcode_t status = REG_OK;
+ int bottom = tre_stack_num_objects(stack);
+ int pos_add = 0;
+ int pos_add_total = 0;
+ int max_pos = 0;
+ int iter_depth = 0;
+
+ STACK_PUSHR(stack, voidptr, ast);
+ STACK_PUSHR(stack, int, EXPAND_RECURSE);
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ tre_ast_node_t *node;
+ tre_expand_ast_symbol_t symbol;
+
+ if (status != REG_OK)
+ break;
+
+ symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol)
+ {
+ case EXPAND_RECURSE:
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit= node->obj;
+ if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
+ {
+ lit->position += pos_add;
+ if (lit->position > max_pos)
+ max_pos = lit->position;
+ }
+ break;
+ }
+ case UNION:
+ {
+ tre_union_t *uni = node->obj;
+ STACK_PUSHX(stack, voidptr, uni->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, uni->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case CATENATION:
+ {
+ tre_catenation_t *cat = node->obj;
+ STACK_PUSHX(stack, voidptr, cat->right);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ break;
+ }
+ case ITERATION:
+ {
+ tre_iteration_t *iter = node->obj;
+ STACK_PUSHX(stack, int, pos_add);
+ STACK_PUSHX(stack, voidptr, node);
+ STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ STACK_PUSHX(stack, int, EXPAND_RECURSE);
+ /* If we are going to expand this node at EXPAND_AFTER_ITER
+ then don't increase the `pos' fields of the nodes now, it
+ will get done when expanding. */
+ if (iter->min > 1 || iter->max > 1)
+ pos_add = 0;
+ iter_depth++;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case EXPAND_AFTER_ITER:
+ {
+ tre_iteration_t *iter = node->obj;
+ int pos_add_last;
+ pos_add = tre_stack_pop_int(stack);
+ pos_add_last = pos_add;
+ if (iter->min > 1 || iter->max > 1)
+ {
+ tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
+ int j;
+ int pos_add_save = pos_add;
+
+ /* Create a catenated sequence of copies of the node. */
+ for (j = 0; j < iter->min; j++)
+ {
+ tre_ast_node_t *copy;
+ /* Remove tags from all but the last copy. */
+ int flags = ((j + 1 < iter->min)
+ ? COPY_REMOVE_TAGS
+ : COPY_MAXIMIZE_FIRST_TAG);
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, flags,
+ &pos_add, tag_directions, &copy,
+ &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq1 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, copy);
+ else
+ seq1 = copy;
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ }
+
+ if (iter->max == -1)
+ {
+ /* No upper limit. */
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
+ &pos_add, NULL, &seq2, &max_pos);
+ if (status != REG_OK)
+ return status;
+ seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ }
+ else
+ {
+ for (j = iter->min; j < iter->max; j++)
+ {
+ tre_ast_node_t *tmp, *copy;
+ pos_add_save = pos_add;
+ status = tre_copy_ast(mem, stack, iter->arg, 0,
+ &pos_add, NULL, &copy, &max_pos);
+ if (status != REG_OK)
+ return status;
+ if (seq2 != NULL)
+ seq2 = tre_ast_new_catenation(mem, copy, seq2);
+ else
+ seq2 = copy;
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
+ if (tmp == NULL)
+ return REG_ESPACE;
+ seq2 = tre_ast_new_union(mem, tmp, seq2);
+ if (seq2 == NULL)
+ return REG_ESPACE;
+ }
+ }
+
+ pos_add = pos_add_save;
+ if (seq1 == NULL)
+ seq1 = seq2;
+ else if (seq2 != NULL)
+ seq1 = tre_ast_new_catenation(mem, seq1, seq2);
+ if (seq1 == NULL)
+ return REG_ESPACE;
+ node->obj = seq1->obj;
+ node->type = seq1->type;
+ }
+
+ iter_depth--;
+ pos_add_total += pos_add - pos_add_last;
+ if (iter_depth == 0)
+ pos_add = pos_add_total;
+
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ *position += pos_add_total;
+
+ /* `max_pos' should never be larger than `*position' if the above
+ code works, but just an extra safeguard let's make sure
+ `*position' is set large enough so enough memory will be
+ allocated for the transition table. */
+ if (max_pos > *position)
+ *position = max_pos;
+
+ return status;
+}
+
+static tre_pos_and_tags_t *
+tre_set_empty(tre_mem_t mem)
+{
+ tre_pos_and_tags_t *new_set;
+
+ new_set = tre_mem_calloc(mem, sizeof(*new_set));
+ if (new_set == NULL)
+ return NULL;
+
+ new_set[0].position = -1;
+ new_set[0].code_min = -1;
+ new_set[0].code_max = -1;
+
+ return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
+ tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
+{
+ tre_pos_and_tags_t *new_set;
+
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
+ if (new_set == NULL)
+ return NULL;
+
+ new_set[0].position = position;
+ new_set[0].code_min = code_min;
+ new_set[0].code_max = code_max;
+ new_set[0].class = class;
+ new_set[0].neg_classes = neg_classes;
+ new_set[0].backref = backref;
+ new_set[1].position = -1;
+ new_set[1].code_min = -1;
+ new_set[1].code_max = -1;
+
+ return new_set;
+}
+
+static tre_pos_and_tags_t *
+tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
+ int *tags, int assertions)
+{
+ int s1, s2, i, j;
+ tre_pos_and_tags_t *new_set;
+ int *new_tags;
+ int num_tags;
+
+ for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
+ for (s1 = 0; set1[s1].position >= 0; s1++);
+ for (s2 = 0; set2[s2].position >= 0; s2++);
+ new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
+ if (!new_set )
+ return NULL;
+
+ for (s1 = 0; set1[s1].position >= 0; s1++)
+ {
+ new_set[s1].position = set1[s1].position;
+ new_set[s1].code_min = set1[s1].code_min;
+ new_set[s1].code_max = set1[s1].code_max;
+ new_set[s1].assertions = set1[s1].assertions | assertions;
+ new_set[s1].class = set1[s1].class;
+ new_set[s1].neg_classes = set1[s1].neg_classes;
+ new_set[s1].backref = set1[s1].backref;
+ if (set1[s1].tags == NULL && tags == NULL)
+ new_set[s1].tags = NULL;
+ else
+ {
+ for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
+ new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
+ * (i + num_tags + 1)));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set1[s1].tags[j];
+ for (i = 0; i < num_tags; i++)
+ new_tags[j + i] = tags[i];
+ new_tags[j + i] = -1;
+ new_set[s1].tags = new_tags;
+ }
+ }
+
+ for (s2 = 0; set2[s2].position >= 0; s2++)
+ {
+ new_set[s1 + s2].position = set2[s2].position;
+ new_set[s1 + s2].code_min = set2[s2].code_min;
+ new_set[s1 + s2].code_max = set2[s2].code_max;
+ /* XXX - why not | assertions here as well? */
+ new_set[s1 + s2].assertions = set2[s2].assertions;
+ new_set[s1 + s2].class = set2[s2].class;
+ new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
+ new_set[s1 + s2].backref = set2[s2].backref;
+ if (set2[s2].tags == NULL)
+ new_set[s1 + s2].tags = NULL;
+ else
+ {
+ for (i = 0; set2[s2].tags[i] >= 0; i++);
+ new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
+ if (new_tags == NULL)
+ return NULL;
+ for (j = 0; j < i; j++)
+ new_tags[j] = set2[s2].tags[j];
+ new_tags[j] = -1;
+ new_set[s1 + s2].tags = new_tags;
+ }
+ }
+ new_set[s1 + s2].position = -1;
+ return new_set;
+}
+
+/* Finds the empty path through `node' which is the one that should be
+ taken according to POSIX.2 rules, and adds the tags on that path to
+ `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
+ set to the number of tags seen on the path. */
+static reg_errcode_t
+tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
+ int *assertions, int *num_tags_seen)
+{
+ tre_literal_t *lit;
+ tre_union_t *uni;
+ tre_catenation_t *cat;
+ tre_iteration_t *iter;
+ int i;
+ int bottom = tre_stack_num_objects(stack);
+ reg_errcode_t status = REG_OK;
+ if (num_tags_seen)
+ *num_tags_seen = 0;
+
+ status = tre_stack_push_voidptr(stack, node);
+
+ /* Walk through the tree recursively. */
+ while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
+ {
+ node = tre_stack_pop_voidptr(stack);
+
+ switch (node->type)
+ {
+ case LITERAL:
+ lit = (tre_literal_t *)node->obj;
+ switch (lit->code_min)
+ {
+ case TAG:
+ if (lit->code_max >= 0)
+ {
+ if (tags != NULL)
+ {
+ /* Add the tag to `tags'. */
+ for (i = 0; tags[i] >= 0; i++)
+ if (tags[i] == lit->code_max)
+ break;
+ if (tags[i] < 0)
+ {
+ tags[i] = lit->code_max;
+ tags[i + 1] = -1;
+ }
+ }
+ if (num_tags_seen)
+ (*num_tags_seen)++;
+ }
+ break;
+ case ASSERTION:
+ assert(lit->code_max >= 1
+ || lit->code_max <= ASSERT_LAST);
+ if (assertions != NULL)
+ *assertions |= lit->code_max;
+ break;
+ case EMPTY:
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+
+ case UNION:
+ /* Subexpressions starting earlier take priority over ones
+ starting later, so we prefer the left subexpression over the
+ right subexpression. */
+ uni = (tre_union_t *)node->obj;
+ if (uni->left->nullable)
+ STACK_PUSHX(stack, voidptr, uni->left)
+ else if (uni->right->nullable)
+ STACK_PUSHX(stack, voidptr, uni->right)
+ else
+ assert(0);
+ break;
+
+ case CATENATION:
+ /* The path must go through both children. */
+ cat = (tre_catenation_t *)node->obj;
+ assert(cat->left->nullable);
+ assert(cat->right->nullable);
+ STACK_PUSHX(stack, voidptr, cat->left);
+ STACK_PUSHX(stack, voidptr, cat->right);
+ break;
+
+ case ITERATION:
+ /* A match with an empty string is preferred over no match at
+ all, so we go through the argument if possible. */
+ iter = (tre_iteration_t *)node->obj;
+ if (iter->arg->nullable)
+ STACK_PUSHX(stack, voidptr, iter->arg);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ return status;
+}
+
+
+typedef enum {
+ NFL_RECURSE,
+ NFL_POST_UNION,
+ NFL_POST_CATENATION,
+ NFL_POST_ITERATION
+} tre_nfl_stack_symbol_t;
+
+
+/* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
+ the nodes of the AST `tree'. */
+static reg_errcode_t
+tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
+{
+ int bottom = tre_stack_num_objects(stack);
+
+ STACK_PUSHR(stack, voidptr, tree);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+
+ while (tre_stack_num_objects(stack) > bottom)
+ {
+ tre_nfl_stack_symbol_t symbol;
+ tre_ast_node_t *node;
+
+ symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
+ node = tre_stack_pop_voidptr(stack);
+ switch (symbol)
+ {
+ case NFL_RECURSE:
+ switch (node->type)
+ {
+ case LITERAL:
+ {
+ tre_literal_t *lit = (tre_literal_t *)node->obj;
+ if (IS_BACKREF(lit))
+ {
+ /* Back references: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos = tre_set_one(mem, lit->position, 0,
+ TRE_CHAR_MAX, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(mem, lit->position, 0,
+ TRE_CHAR_MAX, 0, NULL,
+ (int)lit->code_max);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else if (lit->code_min < 0)
+ {
+ /* Tags, empty strings, params, and zero width assertions:
+ nullable = true, firstpos = {}, and lastpos = {}. */
+ node->nullable = 1;
+ node->firstpos = tre_set_empty(mem);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_empty(mem);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ /* Literal at position i: nullable = false, firstpos = {i},
+ lastpos = {i}. */
+ node->nullable = 0;
+ node->firstpos =
+ tre_set_one(mem, lit->position, (int)lit->code_min,
+ (int)lit->code_max, 0, NULL, -1);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_one(mem, lit->position,
+ (int)lit->code_min,
+ (int)lit->code_max,
+ lit->class, lit->neg_classes,
+ -1);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ break;
+ }
+
+ case UNION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_UNION);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case CATENATION:
+ /* Compute the attributes for the two subtrees, and after that
+ for this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_CATENATION);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+
+ case ITERATION:
+ /* Compute the attributes for the subtree, and after that for
+ this node. */
+ STACK_PUSHR(stack, voidptr, node);
+ STACK_PUSHR(stack, int, NFL_POST_ITERATION);
+ STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
+ STACK_PUSHR(stack, int, NFL_RECURSE);
+ break;
+ }
+ break; /* end case: NFL_RECURSE */
+
+ case NFL_POST_UNION:
+ {
+ tre_union_t *uni = (tre_union_t *)node->obj;
+ node->nullable = uni->left->nullable || uni->right->nullable;
+ node->firstpos = tre_set_union(mem, uni->left->firstpos,
+ uni->right->firstpos, NULL, 0);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ node->lastpos = tre_set_union(mem, uni->left->lastpos,
+ uni->right->lastpos, NULL, 0);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ break;
+ }
+
+ case NFL_POST_ITERATION:
+ {
+ tre_iteration_t *iter = (tre_iteration_t *)node->obj;
+
+ if (iter->min == 0 || iter->arg->nullable)
+ node->nullable = 1;
+ else
+ node->nullable = 0;
+ node->firstpos = iter->arg->firstpos;
+ node->lastpos = iter->arg->lastpos;
+ break;
+ }
+
+ case NFL_POST_CATENATION:
+ {
+ int num_tags, *tags, assertions;
+ reg_errcode_t status;
+ tre_catenation_t *cat = node->obj;
+ node->nullable = cat->left->nullable && cat->right->nullable;
+
+ /* Compute firstpos. */
+ if (cat->left->nullable)
+ {
+ /* The left side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->left,
+ NULL, NULL, &num_tags);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(*tags) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->left, tags,
+ &assertions, NULL);
+ if (status != REG_OK)
+ {
+ xfree(tags);
+ return status;
+ }
+ node->firstpos =
+ tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
+ tags, assertions);
+ xfree(tags);
+ if (!node->firstpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ node->firstpos = cat->left->firstpos;
+ }
+
+ /* Compute lastpos. */
+ if (cat->right->nullable)
+ {
+ /* The right side matches the empty string. Make a first pass
+ with tre_match_empty() to get the number of tags and
+ parameters. */
+ status = tre_match_empty(stack, cat->right,
+ NULL, NULL, &num_tags);
+ if (status != REG_OK)
+ return status;
+ /* Allocate arrays for the tags and parameters. */
+ tags = xmalloc(sizeof(int) * (num_tags + 1));
+ if (!tags)
+ return REG_ESPACE;
+ tags[0] = -1;
+ assertions = 0;
+ /* Second pass with tre_mach_empty() to get the list of
+ tags and parameters. */
+ status = tre_match_empty(stack, cat->right, tags,
+ &assertions, NULL);
+ if (status != REG_OK)
+ {
+ xfree(tags);
+ return status;
+ }
+ node->lastpos =
+ tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
+ tags, assertions);
+ xfree(tags);
+ if (!node->lastpos)
+ return REG_ESPACE;
+ }
+ else
+ {
+ node->lastpos = cat->right->lastpos;
+ }
+ break;
+ }
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ return REG_OK;
+}
+
+
+/* Adds a transition from each position in `p1' to each position in `p2'. */
+static reg_errcode_t
+tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
+ tre_tnfa_transition_t *transitions,
+ int *counts, int *offs)
+{
+ tre_pos_and_tags_t *orig_p2 = p2;
+ tre_tnfa_transition_t *trans;
+ int i, j, k, l, dup, prev_p2_pos;
+
+ if (transitions != NULL)
+ while (p1->position >= 0)
+ {
+ p2 = orig_p2;
+ prev_p2_pos = -1;
+ while (p2->position >= 0)
+ {
+ /* Optimization: if this position was already handled, skip it. */
+ if (p2->position == prev_p2_pos)
+ {
+ p2++;
+ continue;
+ }
+ prev_p2_pos = p2->position;
+ /* Set `trans' to point to the next unused transition from
+ position `p1->position'. */
+ trans = transitions + offs[p1->position];
+ while (trans->state != NULL)
+ {
+#if 0
+ /* If we find a previous transition from `p1->position' to
+ `p2->position', it is overwritten. This can happen only
+ if there are nested loops in the regexp, like in "((a)*)*".
+ In POSIX.2 repetition using the outer loop is always
+ preferred over using the inner loop. Therefore the
+ transition for the inner loop is useless and can be thrown
+ away. */
+ /* XXX - The same position is used for all nodes in a bracket
+ expression, so this optimization cannot be used (it will
+ break bracket expressions) unless I figure out a way to
+ detect it here. */
+ if (trans->state_id == p2->position)
+ {
+ break;
+ }
+#endif
+ trans++;
+ }
+
+ if (trans->state == NULL)
+ (trans + 1)->state = NULL;
+ /* Use the character ranges, assertions, etc. from `p1' for
+ the transition from `p1' to `p2'. */
+ trans->code_min = p1->code_min;
+ trans->code_max = p1->code_max;
+ trans->state = transitions + offs[p2->position];
+ trans->state_id = p2->position;
+ trans->assertions = p1->assertions | p2->assertions
+ | (p1->class ? ASSERT_CHAR_CLASS : 0)
+ | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
+ if (p1->backref >= 0)
+ {
+ assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
+ assert(p2->backref < 0);
+ trans->u.backref = p1->backref;
+ trans->assertions |= ASSERT_BACKREF;
+ }
+ else
+ trans->u.class = p1->class;
+ if (p1->neg_classes != NULL)
+ {
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
+ trans->neg_classes =
+ xmalloc(sizeof(*trans->neg_classes) * (i + 1));
+ if (trans->neg_classes == NULL)
+ return REG_ESPACE;
+ for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
+ trans->neg_classes[i] = p1->neg_classes[i];
+ trans->neg_classes[i] = (tre_ctype_t)0;
+ }
+ else
+ trans->neg_classes = NULL;
+
+ /* Find out how many tags this transition has. */
+ i = 0;
+ if (p1->tags != NULL)
+ while(p1->tags[i] >= 0)
+ i++;
+ j = 0;
+ if (p2->tags != NULL)
+ while(p2->tags[j] >= 0)
+ j++;
+
+ /* If we are overwriting a transition, free the old tag array. */
+ if (trans->tags != NULL)
+ xfree(trans->tags);
+ trans->tags = NULL;
+
+ /* If there were any tags, allocate an array and fill it. */
+ if (i + j > 0)
+ {
+ trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
+ if (!trans->tags)
+ return REG_ESPACE;
+ i = 0;
+ if (p1->tags != NULL)
+ while(p1->tags[i] >= 0)
+ {
+ trans->tags[i] = p1->tags[i];
+ i++;
+ }
+ l = i;
+ j = 0;
+ if (p2->tags != NULL)
+ while (p2->tags[j] >= 0)
+ {
+ /* Don't add duplicates. */
+ dup = 0;
+ for (k = 0; k < i; k++)
+ if (trans->tags[k] == p2->tags[j])
+ {
+ dup = 1;
+ break;
+ }
+ if (!dup)
+ trans->tags[l++] = p2->tags[j];
+ j++;
+ }
+ trans->tags[l] = -1;
+ }
+
+ p2++;
+ }
+ p1++;
+ }
+ else
+ /* Compute a maximum limit for the number of transitions leaving
+ from each state. */
+ while (p1->position >= 0)
+ {
+ p2 = orig_p2;
+ while (p2->position >= 0)
+ {
+ counts[p1->position]++;
+ p2++;
+ }
+ p1++;
+ }
+ return REG_OK;
+}
+
+/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
+ labelled with one character range (there are no transitions on empty
+ strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
+ the regexp. */
+static reg_errcode_t
+tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
+ int *counts, int *offs)
+{
+ tre_union_t *uni;
+ tre_catenation_t *cat;
+ tre_iteration_t *iter;
+ reg_errcode_t errcode = REG_OK;
+
+ /* XXX - recurse using a stack!. */
+ switch (node->type)
+ {
+ case LITERAL:
+ break;
+ case UNION:
+ uni = (tre_union_t *)node->obj;
+ errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
+ break;
+
+ case CATENATION:
+ cat = (tre_catenation_t *)node->obj;
+ /* Add a transition from each position in cat->left->lastpos
+ to each position in cat->right->firstpos. */
+ errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
+ transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
+ break;
+
+ case ITERATION:
+ iter = (tre_iteration_t *)node->obj;
+ assert(iter->max == -1 || iter->max == 1);
+
+ if (iter->max == -1)
+ {
+ assert(iter->min == 0 || iter->min == 1);
+ /* Add a transition from each last position in the iterated
+ expression to each first position. */
+ errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
+ transitions, counts, offs);
+ if (errcode != REG_OK)
+ return errcode;
+ }
+ errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
+ break;
+ }
+ return errcode;
+}
+
+
+#define ERROR_EXIT(err) \
+ do \
+ { \
+ errcode = err; \
+ if (/*CONSTCOND*/1) \
+ goto error_exit; \
+ } \
+ while (/*CONSTCOND*/0)
+
+
+int
+regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
+{
+ tre_stack_t *stack;
+ tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
+ tre_pos_and_tags_t *p;
+ int *counts = NULL, *offs = NULL;
+ int i, add = 0;
+ tre_tnfa_transition_t *transitions, *initial;
+ tre_tnfa_t *tnfa = NULL;
+ tre_submatch_data_t *submatch_data;
+ tre_tag_direction_t *tag_directions = NULL;
+ reg_errcode_t errcode;
+ tre_mem_t mem;
+
+ /* Parse context. */
+ tre_parse_ctx_t parse_ctx;
+
+ /* Allocate a stack used throughout the compilation process for various
+ purposes. */
+ stack = tre_stack_new(512, 1024000, 128);
+ if (!stack)
+ return REG_ESPACE;
+ /* Allocate a fast memory allocator. */
+ mem = tre_mem_new();
+ if (!mem)
+ {
+ tre_stack_destroy(stack);
+ return REG_ESPACE;
+ }
+
+ /* Parse the regexp. */
+ memset(&parse_ctx, 0, sizeof(parse_ctx));
+ parse_ctx.mem = mem;
+ parse_ctx.stack = stack;
+ parse_ctx.start = regex;
+ parse_ctx.cflags = cflags;
+ parse_ctx.max_backref = -1;
+ errcode = tre_parse(&parse_ctx);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+ preg->re_nsub = parse_ctx.submatch_id - 1;
+ tree = parse_ctx.n;
+
+#ifdef TRE_DEBUG
+ tre_ast_print(tree);
+#endif /* TRE_DEBUG */
+
+ /* Referring to nonexistent subexpressions is illegal. */
+ if (parse_ctx.max_backref > (int)preg->re_nsub)
+ ERROR_EXIT(REG_ESUBREG);
+
+ /* Allocate the TNFA struct. */
+ tnfa = xcalloc(1, sizeof(tre_tnfa_t));
+ if (tnfa == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->have_backrefs = parse_ctx.max_backref >= 0;
+ tnfa->have_approx = 0;
+ tnfa->num_submatches = parse_ctx.submatch_id;
+
+ /* Set up tags for submatch addressing. If REG_NOSUB is set and the
+ regexp does not have back references, this can be skipped. */
+ if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
+ {
+
+ /* Figure out how many tags we will need. */
+ errcode = tre_add_tags(NULL, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ if (tnfa->num_tags > 0)
+ {
+ tag_directions = xmalloc(sizeof(*tag_directions)
+ * (tnfa->num_tags + 1));
+ if (tag_directions == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->tag_directions = tag_directions;
+ memset(tag_directions, -1,
+ sizeof(*tag_directions) * (tnfa->num_tags + 1));
+ }
+ tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
+ sizeof(*tnfa->minimal_tags));
+ if (tnfa->minimal_tags == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
+ sizeof(*submatch_data));
+ if (submatch_data == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->submatch_data = submatch_data;
+
+ errcode = tre_add_tags(mem, stack, tree, tnfa);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ }
+
+ /* Expand iteration nodes. */
+ errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
+ tag_directions);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ /* Add a dummy node for the final state.
+ XXX - For certain patterns this dummy node can be optimized away,
+ for example "a*" or "ab*". Figure out a simple way to detect
+ this possibility. */
+ tmp_ast_l = tree;
+ tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
+ if (tmp_ast_r == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
+ if (tree == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ errcode = tre_compute_nfl(mem, stack, tree);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ counts = xmalloc(sizeof(int) * parse_ctx.position);
+ if (counts == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ offs = xmalloc(sizeof(int) * parse_ctx.position);
+ if (offs == NULL)
+ ERROR_EXIT(REG_ESPACE);
+
+ for (i = 0; i < parse_ctx.position; i++)
+ counts[i] = 0;
+ tre_ast_to_tnfa(tree, NULL, counts, NULL);
+
+ add = 0;
+ for (i = 0; i < parse_ctx.position; i++)
+ {
+ offs[i] = add;
+ add += counts[i] + 1;
+ counts[i] = 0;
+ }
+ transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
+ if (transitions == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->transitions = transitions;
+ tnfa->num_transitions = add;
+
+ errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
+ if (errcode != REG_OK)
+ ERROR_EXIT(errcode);
+
+ tnfa->firstpos_chars = NULL;
+
+ p = tree->firstpos;
+ i = 0;
+ while (p->position >= 0)
+ {
+ i++;
+ p++;
+ }
+
+ initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
+ if (initial == NULL)
+ ERROR_EXIT(REG_ESPACE);
+ tnfa->initial = initial;
+
+ i = 0;
+ for (p = tree->firstpos; p->position >= 0; p++)
+ {
+ initial[i].state = transitions + offs[p->position];
+ initial[i].state_id = p->position;
+ initial[i].tags = NULL;
+ /* Copy the arrays p->tags, and p->params, they are allocated
+ from a tre_mem object. */
+ if (p->tags)
+ {
+ int j;
+ for (j = 0; p->tags[j] >= 0; j++);
+ initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
+ if (!initial[i].tags)
+ ERROR_EXIT(REG_ESPACE);
+ memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
+ }
+ initial[i].assertions = p->assertions;
+ i++;
+ }
+ initial[i].state = NULL;
+
+ tnfa->num_transitions = add;
+ tnfa->final = transitions + offs[tree->lastpos[0].position];
+ tnfa->num_states = parse_ctx.position;
+ tnfa->cflags = cflags;
+
+ tre_mem_destroy(mem);
+ tre_stack_destroy(stack);
+ xfree(counts);
+ xfree(offs);
+
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ return REG_OK;
+
+ error_exit:
+ /* Free everything that was allocated and return the error code. */
+ tre_mem_destroy(mem);
+ if (stack != NULL)
+ tre_stack_destroy(stack);
+ if (counts != NULL)
+ xfree(counts);
+ if (offs != NULL)
+ xfree(offs);
+ preg->TRE_REGEX_T_FIELD = (void *)tnfa;
+ regfree(preg);
+ return errcode;
+}
+
+
+
+
+void
+regfree(regex_t *preg)
+{
+ tre_tnfa_t *tnfa;
+ unsigned int i;
+ tre_tnfa_transition_t *trans;
+
+ tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ if (!tnfa)
+ return;
+
+ for (i = 0; i < tnfa->num_transitions; i++)
+ if (tnfa->transitions[i].state)
+ {
+ if (tnfa->transitions[i].tags)
+ xfree(tnfa->transitions[i].tags);
+ if (tnfa->transitions[i].neg_classes)
+ xfree(tnfa->transitions[i].neg_classes);
+ }
+ if (tnfa->transitions)
+ xfree(tnfa->transitions);
+
+ if (tnfa->initial)
+ {
+ for (trans = tnfa->initial; trans->state; trans++)
+ {
+ if (trans->tags)
+ xfree(trans->tags);
+ }
+ xfree(tnfa->initial);
+ }
+
+ if (tnfa->submatch_data)
+ {
+ for (i = 0; i < tnfa->num_submatches; i++)
+ if (tnfa->submatch_data[i].parents)
+ xfree(tnfa->submatch_data[i].parents);
+ xfree(tnfa->submatch_data);
+ }
+
+ if (tnfa->tag_directions)
+ xfree(tnfa->tag_directions);
+ if (tnfa->firstpos_chars)
+ xfree(tnfa->firstpos_chars);
+ if (tnfa->minimal_tags)
+ xfree(tnfa->minimal_tags);
+ xfree(tnfa);
+}
diff --git a/libc-top-half/musl/src/regex/regerror.c b/libc-top-half/musl/src/regex/regerror.c
new file mode 100644
index 0000000..5b347cc
--- /dev/null
+++ b/libc-top-half/musl/src/regex/regerror.c
@@ -0,0 +1,37 @@
+#include <string.h>
+#include <regex.h>
+#include <stdio.h>
+#include "locale_impl.h"
+
+/* Error message strings for error codes listed in `regex.h'. This list
+ needs to be in sync with the codes listed there, naturally. */
+
+/* Converted to single string by Rich Felker to remove the need for
+ * data relocations at runtime, 27 Feb 2006. */
+
+static const char messages[] = {
+ "No error\0"
+ "No match\0"
+ "Invalid regexp\0"
+ "Unknown collating element\0"
+ "Unknown character class name\0"
+ "Trailing backslash\0"
+ "Invalid back reference\0"
+ "Missing ']'\0"
+ "Missing ')'\0"
+ "Missing '}'\0"
+ "Invalid contents of {}\0"
+ "Invalid character range\0"
+ "Out of memory\0"
+ "Repetition not preceded by valid expression\0"
+ "\0Unknown error"
+};
+
+size_t regerror(int e, const regex_t *restrict preg, char *restrict buf, size_t size)
+{
+ const char *s;
+ for (s=messages; e && *s; e--, s+=strlen(s)+1);
+ if (!*s) s++;
+ s = LCTRANS_CUR(s);
+ return 1+snprintf(buf, size, "%s", s);
+}
diff --git a/libc-top-half/musl/src/regex/regexec.c b/libc-top-half/musl/src/regex/regexec.c
new file mode 100644
index 0000000..253b0e1
--- /dev/null
+++ b/libc-top-half/musl/src/regex/regexec.c
@@ -0,0 +1,1028 @@
+/*
+ regexec.c - TRE POSIX compatible matching functions (and more).
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include <regex.h>
+
+#include "tre.h"
+
+#include <assert.h>
+
+static void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
+
+/***********************************************************************
+ from tre-match-utils.h
+***********************************************************************/
+
+#define GET_NEXT_WCHAR() do { \
+ prev_c = next_c; pos += pos_add_next; \
+ if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
+ if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \
+ else pos_add_next++; \
+ } \
+ str_byte += pos_add_next; \
+ } while (0)
+
+#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c))
+
+#define CHECK_ASSERTIONS(assertions) \
+ (((assertions & ASSERT_AT_BOL) \
+ && (pos > 0 || reg_notbol) \
+ && (prev_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_EOL) \
+ && (next_c != L'\0' || reg_noteol) \
+ && (next_c != L'\n' || !reg_newline)) \
+ || ((assertions & ASSERT_AT_BOW) \
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_EOW) \
+ && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB) \
+ && (pos != 0 && next_c != L'\0' \
+ && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
+ || ((assertions & ASSERT_AT_WB_NEG) \
+ && (pos == 0 || next_c == L'\0' \
+ || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
+
+#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
+ (((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && !(tnfa->cflags & REG_ICASE) \
+ && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS) \
+ && (tnfa->cflags & REG_ICASE) \
+ && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
+ && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
+ || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
+ && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
+ tnfa->cflags & REG_ICASE)))
+
+
+
+
+/* Returns 1 if `t1' wins `t2', 0 otherwise. */
+static int
+tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
+ regoff_t *t1, regoff_t *t2)
+{
+ int i;
+ for (i = 0; i < num_tags; i++)
+ {
+ if (tag_directions[i] == TRE_TAG_MINIMIZE)
+ {
+ if (t1[i] < t2[i])
+ return 1;
+ if (t1[i] > t2[i])
+ return 0;
+ }
+ else
+ {
+ if (t1[i] > t2[i])
+ return 1;
+ if (t1[i] < t2[i])
+ return 0;
+ }
+ }
+ /* assert(0);*/
+ return 0;
+}
+
+static int
+tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
+{
+ while (*classes != (tre_ctype_t)0)
+ if ((!icase && tre_isctype(wc, *classes))
+ || (icase && (tre_isctype(tre_toupper(wc), *classes)
+ || tre_isctype(tre_tolower(wc), *classes))))
+ return 1; /* Match. */
+ else
+ classes++;
+ return 0; /* No match. */
+}
+
+
+/***********************************************************************
+ from tre-match-parallel.c
+***********************************************************************/
+
+/*
+ This algorithm searches for matches basically by reading characters
+ in the searched string one by one, starting at the beginning. All
+ matching paths in the TNFA are traversed in parallel. When two or
+ more paths reach the same state, exactly one is chosen according to
+ tag ordering rules; if returning submatches is not required it does
+ not matter which path is chosen.
+
+ The worst case time required for finding the leftmost and longest
+ match, or determining that there is no match, is always linearly
+ dependent on the length of the text being searched.
+
+ This algorithm cannot handle TNFAs with back referencing nodes.
+ See `tre-match-backtrack.c'.
+*/
+
+typedef struct {
+ tre_tnfa_transition_t *state;
+ regoff_t *tags;
+} tre_tnfa_reach_t;
+
+typedef struct {
+ regoff_t pos;
+ regoff_t **tags;
+} tre_reach_pos_t;
+
+
+static reg_errcode_t
+tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
+ regoff_t *match_tags, int eflags,
+ regoff_t *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ regoff_t pos = -1;
+ regoff_t pos_add_next = 1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+ reg_errcode_t ret;
+
+ char *buf;
+ tre_tnfa_transition_t *trans_i;
+ tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
+ tre_reach_pos_t *reach_pos;
+ int *tag_i;
+ int num_tags, i;
+
+ regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */
+ int new_match = 0;
+ regoff_t *tmp_tags = NULL;
+ regoff_t *tmp_iptr;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ if (!match_tags)
+ num_tags = 0;
+ else
+ num_tags = tnfa->num_tags;
+
+ /* Allocate memory for temporary data required for matching. This needs to
+ be done for every matching operation to be thread safe. This allocates
+ everything in a single large block with calloc(). */
+ {
+ size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
+ char *tmp_buf;
+
+ /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
+ * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
+ if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
+ return REG_ESPACE;
+
+ /* Likewise check rbytes. */
+ if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
+ return REG_ESPACE;
+
+ /* Likewise check pbytes. */
+ if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
+ return REG_ESPACE;
+
+ /* Compute the length of the block we need. */
+ tbytes = sizeof(*tmp_tags) * num_tags;
+ rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
+ pbytes = sizeof(*reach_pos) * tnfa->num_states;
+ xbytes = sizeof(regoff_t) * num_tags;
+ total_bytes =
+ (sizeof(long) - 1) * 4 /* for alignment paddings */
+ + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
+
+ /* Allocate the memory. */
+ buf = calloc(total_bytes, 1);
+ if (buf == NULL)
+ return REG_ESPACE;
+
+ /* Get the various pointers within tmp_buf (properly aligned). */
+ tmp_tags = (void *)buf;
+ tmp_buf = buf + tbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_next = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach = (void *)tmp_buf;
+ tmp_buf += rbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ reach_pos = (void *)tmp_buf;
+ tmp_buf += pbytes;
+ tmp_buf += ALIGN(tmp_buf, long);
+ for (i = 0; i < tnfa->num_states; i++)
+ {
+ reach[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ reach_next[i].tags = (void *)tmp_buf;
+ tmp_buf += xbytes;
+ }
+ }
+
+ for (i = 0; i < tnfa->num_states; i++)
+ reach_pos[i].pos = -1;
+
+ GET_NEXT_WCHAR();
+ pos = 0;
+
+ reach_next_i = reach_next;
+ while (1)
+ {
+ /* If no match found yet, add the initial states to `reach_next'. */
+ if (match_eo < 0)
+ {
+ trans_i = tnfa->initial;
+ while (trans_i->state != NULL)
+ {
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ if (trans_i->assertions
+ && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ trans_i++;
+ continue;
+ }
+
+ reach_next_i->state = trans_i->state;
+ for (i = 0; i < num_tags; i++)
+ reach_next_i->tags[i] = -1;
+ tag_i = trans_i->tags;
+ if (tag_i)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ reach_next_i->tags[*tag_i] = pos;
+ tag_i++;
+ }
+ if (reach_next_i->state == tnfa->final)
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+ reach_next_i++;
+ }
+ trans_i++;
+ }
+ reach_next_i->state = NULL;
+ }
+ else
+ {
+ if (num_tags == 0 || reach_next_i == reach_next)
+ /* We have found a match. */
+ break;
+ }
+
+ /* Check for end of string. */
+ if (!next_c) break;
+
+ GET_NEXT_WCHAR();
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+
+ /* For each state in `reach', weed out states that don't fulfill the
+ minimal matching conditions. */
+ if (tnfa->num_minimals && new_match)
+ {
+ new_match = 0;
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ int skip = 0;
+ for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
+ {
+ int end = tnfa->minimal_tags[i];
+ int start = tnfa->minimal_tags[i + 1];
+ if (end >= num_tags)
+ {
+ skip = 1;
+ break;
+ }
+ else if (reach_i->tags[start] == match_tags[start]
+ && reach_i->tags[end] < match_tags[end])
+ {
+ skip = 1;
+ break;
+ }
+ }
+ if (!skip)
+ {
+ reach_next_i->state = reach_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = reach_i->tags;
+ reach_i->tags = tmp_iptr;
+ reach_next_i++;
+ }
+ }
+ reach_next_i->state = NULL;
+
+ /* Swap `reach' and `reach_next'. */
+ reach_i = reach;
+ reach = reach_next;
+ reach_next = reach_i;
+ }
+
+ /* For each state in `reach' see if there is a transition leaving with
+ the current input symbol to a state not yet in `reach_next', and
+ add the destination states to `reach_next'. */
+ reach_next_i = reach_next;
+ for (reach_i = reach; reach_i->state; reach_i++)
+ {
+ for (trans_i = reach_i->state; trans_i->state; trans_i++)
+ {
+ /* Does this transition match the input symbol? */
+ if (trans_i->code_min <= (tre_cint_t)prev_c &&
+ trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ continue;
+ }
+
+ /* Compute the tags after this transition. */
+ for (i = 0; i < num_tags; i++)
+ tmp_tags[i] = reach_i->tags[i];
+ tag_i = trans_i->tags;
+ if (tag_i != NULL)
+ while (*tag_i >= 0)
+ {
+ if (*tag_i < num_tags)
+ tmp_tags[*tag_i] = pos;
+ tag_i++;
+ }
+
+ if (reach_pos[trans_i->state_id].pos < pos)
+ {
+ /* Found an unvisited node. */
+ reach_next_i->state = trans_i->state;
+ tmp_iptr = reach_next_i->tags;
+ reach_next_i->tags = tmp_tags;
+ tmp_tags = tmp_iptr;
+ reach_pos[trans_i->state_id].pos = pos;
+ reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
+
+ if (reach_next_i->state == tnfa->final
+ && (match_eo == -1
+ || (num_tags > 0
+ && reach_next_i->tags[0] <= match_tags[0])))
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = reach_next_i->tags[i];
+ }
+ reach_next_i++;
+
+ }
+ else
+ {
+ assert(reach_pos[trans_i->state_id].pos == pos);
+ /* Another path has also reached this state. We choose
+ the winner by examining the tag values for both
+ paths. */
+ if (tre_tag_order(num_tags, tnfa->tag_directions,
+ tmp_tags,
+ *reach_pos[trans_i->state_id].tags))
+ {
+ /* The new path wins. */
+ tmp_iptr = *reach_pos[trans_i->state_id].tags;
+ *reach_pos[trans_i->state_id].tags = tmp_tags;
+ if (trans_i->state == tnfa->final)
+ {
+ match_eo = pos;
+ new_match = 1;
+ for (i = 0; i < num_tags; i++)
+ match_tags[i] = tmp_tags[i];
+ }
+ tmp_tags = tmp_iptr;
+ }
+ }
+ }
+ }
+ }
+ reach_next_i->state = NULL;
+ }
+
+ *match_end_ofs = match_eo;
+ ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+error_exit:
+ xfree(buf);
+ return ret;
+}
+
+
+
+/***********************************************************************
+ from tre-match-backtrack.c
+***********************************************************************/
+
+/*
+ This matcher is for regexps that use back referencing. Regexp matching
+ with back referencing is an NP-complete problem on the number of back
+ references. The easiest way to match them is to use a backtracking
+ routine which basically goes through all possible paths in the TNFA
+ and chooses the one which results in the best (leftmost and longest)
+ match. This can be spectacularly expensive and may run out of stack
+ space, but there really is no better known generic algorithm. Quoting
+ Henry Spencer from comp.compilers:
+ <URL: http://compilers.iecc.com/comparch/article/93-03-102>
+
+ POSIX.2 REs require longest match, which is really exciting to
+ implement since the obsolete ("basic") variant also includes
+ \<digit>. I haven't found a better way of tackling this than doing
+ a preliminary match using a DFA (or simulation) on a modified RE
+ that just replicates subREs for \<digit>, and then doing a
+ backtracking match to determine whether the subRE matches were
+ right. This can be rather slow, but I console myself with the
+ thought that people who use \<digit> deserve very slow execution.
+ (Pun unintentional but very appropriate.)
+
+*/
+
+typedef struct {
+ regoff_t pos;
+ const char *str_byte;
+ tre_tnfa_transition_t *state;
+ int state_id;
+ int next_c;
+ regoff_t *tags;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+} tre_backtrack_item_t;
+
+typedef struct tre_backtrack_struct {
+ tre_backtrack_item_t item;
+ struct tre_backtrack_struct *prev;
+ struct tre_backtrack_struct *next;
+} *tre_backtrack_t;
+
+#ifdef TRE_MBSTATE
+#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
+#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
+#else /* !TRE_MBSTATE */
+#define BT_STACK_MBSTATE_IN
+#define BT_STACK_MBSTATE_OUT
+#endif /* !TRE_MBSTATE */
+
+#define tre_bt_mem_new tre_mem_new
+#define tre_bt_mem_alloc tre_mem_alloc
+#define tre_bt_mem_destroy tre_mem_destroy
+
+
+#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
+ do \
+ { \
+ int i; \
+ if (!stack->next) \
+ { \
+ tre_backtrack_t s; \
+ s = tre_bt_mem_alloc(mem, sizeof(*s)); \
+ if (!s) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ s->prev = stack; \
+ s->next = NULL; \
+ s->item.tags = tre_bt_mem_alloc(mem, \
+ sizeof(*tags) * tnfa->num_tags); \
+ if (!s->item.tags) \
+ { \
+ tre_bt_mem_destroy(mem); \
+ if (tags) \
+ xfree(tags); \
+ if (pmatch) \
+ xfree(pmatch); \
+ if (states_seen) \
+ xfree(states_seen); \
+ return REG_ESPACE; \
+ } \
+ stack->next = s; \
+ stack = s; \
+ } \
+ else \
+ stack = stack->next; \
+ stack->item.pos = (_pos); \
+ stack->item.str_byte = (_str_byte); \
+ stack->item.state = (_state); \
+ stack->item.state_id = (_state_id); \
+ stack->item.next_c = (_next_c); \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ stack->item.tags[i] = (_tags)[i]; \
+ BT_STACK_MBSTATE_IN; \
+ } \
+ while (0)
+
+#define BT_STACK_POP() \
+ do \
+ { \
+ int i; \
+ assert(stack->prev); \
+ pos = stack->item.pos; \
+ str_byte = stack->item.str_byte; \
+ state = stack->item.state; \
+ next_c = stack->item.next_c; \
+ for (i = 0; i < tnfa->num_tags; i++) \
+ tags[i] = stack->item.tags[i]; \
+ BT_STACK_MBSTATE_OUT; \
+ stack = stack->prev; \
+ } \
+ while (0)
+
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+static reg_errcode_t
+tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
+ regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
+{
+ /* State variables required by GET_NEXT_WCHAR. */
+ tre_char_t prev_c = 0, next_c = 0;
+ const char *str_byte = string;
+ regoff_t pos = 0;
+ regoff_t pos_add_next = 1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate;
+#endif /* TRE_MBSTATE */
+ int reg_notbol = eflags & REG_NOTBOL;
+ int reg_noteol = eflags & REG_NOTEOL;
+ int reg_newline = tnfa->cflags & REG_NEWLINE;
+
+ /* These are used to remember the necessary values of the above
+ variables to return to the position where the current search
+ started from. */
+ int next_c_start;
+ const char *str_byte_start;
+ regoff_t pos_start = -1;
+#ifdef TRE_MBSTATE
+ mbstate_t mbstate_start;
+#endif /* TRE_MBSTATE */
+
+ /* End offset of best match so far, or -1 if no match found yet. */
+ regoff_t match_eo = -1;
+ /* Tag arrays. */
+ int *next_tags;
+ regoff_t *tags = NULL;
+ /* Current TNFA state. */
+ tre_tnfa_transition_t *state;
+ int *states_seen = NULL;
+
+ /* Memory allocator to for allocating the backtracking stack. */
+ tre_mem_t mem = tre_bt_mem_new();
+
+ /* The backtracking stack. */
+ tre_backtrack_t stack;
+
+ tre_tnfa_transition_t *trans_i;
+ regmatch_t *pmatch = NULL;
+ int ret;
+
+#ifdef TRE_MBSTATE
+ memset(&mbstate, '\0', sizeof(mbstate));
+#endif /* TRE_MBSTATE */
+
+ if (!mem)
+ return REG_ESPACE;
+ stack = tre_bt_mem_alloc(mem, sizeof(*stack));
+ if (!stack)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ stack->prev = NULL;
+ stack->next = NULL;
+
+ if (tnfa->num_tags)
+ {
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+ if (!tags)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_submatches)
+ {
+ pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
+ if (!pmatch)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+ if (tnfa->num_states)
+ {
+ states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
+ if (!states_seen)
+ {
+ ret = REG_ESPACE;
+ goto error_exit;
+ }
+ }
+
+ retry:
+ {
+ int i;
+ for (i = 0; i < tnfa->num_tags; i++)
+ {
+ tags[i] = -1;
+ if (match_tags)
+ match_tags[i] = -1;
+ }
+ for (i = 0; i < tnfa->num_states; i++)
+ states_seen[i] = 0;
+ }
+
+ state = NULL;
+ pos = pos_start;
+ GET_NEXT_WCHAR();
+ pos_start = pos;
+ next_c_start = next_c;
+ str_byte_start = str_byte;
+#ifdef TRE_MBSTATE
+ mbstate_start = mbstate;
+#endif /* TRE_MBSTATE */
+
+ /* Handle initial states. */
+ next_tags = NULL;
+ for (trans_i = tnfa->initial; trans_i->state; trans_i++)
+ {
+ if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
+ {
+ continue;
+ }
+ if (state == NULL)
+ {
+ /* Start from this state. */
+ state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Backtrack to this state. */
+ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp = trans_i->tags;
+ if (tmp)
+ while (*tmp >= 0)
+ stack->item.tags[*tmp++] = pos;
+ }
+ }
+ }
+
+ if (next_tags)
+ for (; *next_tags >= 0; next_tags++)
+ tags[*next_tags] = pos;
+
+
+ if (state == NULL)
+ goto backtrack;
+
+ while (1)
+ {
+ tre_tnfa_transition_t *next_state;
+ int empty_br_match;
+
+ if (state == tnfa->final)
+ {
+ if (match_eo < pos
+ || (match_eo == pos
+ && match_tags
+ && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
+ tags, match_tags)))
+ {
+ int i;
+ /* This match wins the previous match. */
+ match_eo = pos;
+ if (match_tags)
+ for (i = 0; i < tnfa->num_tags; i++)
+ match_tags[i] = tags[i];
+ }
+ /* Our TNFAs never have transitions leaving from the final state,
+ so we jump right to backtracking. */
+ goto backtrack;
+ }
+
+ /* Go to the next character in the input string. */
+ empty_br_match = 0;
+ trans_i = state;
+ if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
+ {
+ /* This is a back reference state. All transitions leaving from
+ this state have the same back reference "assertion". Instead
+ of reading the next character, we match the back reference. */
+ regoff_t so, eo;
+ int bt = trans_i->u.backref;
+ regoff_t bt_len;
+ int result;
+
+ /* Get the substring we need to match against. Remember to
+ turn off REG_NOSUB temporarily. */
+ tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
+ tnfa, tags, pos);
+ so = pmatch[bt].rm_so;
+ eo = pmatch[bt].rm_eo;
+ bt_len = eo - so;
+
+ result = strncmp((const char*)string + so, str_byte - 1,
+ (size_t)bt_len);
+
+ if (result == 0)
+ {
+ /* Back reference matched. Check for infinite loop. */
+ if (bt_len == 0)
+ empty_br_match = 1;
+ if (empty_br_match && states_seen[trans_i->state_id])
+ {
+ goto backtrack;
+ }
+
+ states_seen[trans_i->state_id] = empty_br_match;
+
+ /* Advance in input string and resync `prev_c', `next_c'
+ and pos. */
+ str_byte += bt_len - 1;
+ pos += bt_len - 1;
+ GET_NEXT_WCHAR();
+ }
+ else
+ {
+ goto backtrack;
+ }
+ }
+ else
+ {
+ /* Check for end of string. */
+ if (next_c == L'\0')
+ goto backtrack;
+
+ /* Read the next character. */
+ GET_NEXT_WCHAR();
+ }
+
+ next_state = NULL;
+ for (trans_i = state; trans_i->state; trans_i++)
+ {
+ if (trans_i->code_min <= (tre_cint_t)prev_c
+ && trans_i->code_max >= (tre_cint_t)prev_c)
+ {
+ if (trans_i->assertions
+ && (CHECK_ASSERTIONS(trans_i->assertions)
+ || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
+ {
+ continue;
+ }
+
+ if (next_state == NULL)
+ {
+ /* First matching transition. */
+ next_state = trans_i->state;
+ next_tags = trans_i->tags;
+ }
+ else
+ {
+ /* Second matching transition. We may need to backtrack here
+ to take this transition instead of the first one, so we
+ push this transition in the backtracking stack so we can
+ jump back here if needed. */
+ BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
+ trans_i->state_id, next_c, tags, mbstate);
+ {
+ int *tmp;
+ for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
+ stack->item.tags[*tmp] = pos;
+ }
+#if 0 /* XXX - it's important not to look at all transitions here to keep
+ the stack small! */
+ break;
+#endif
+ }
+ }
+ }
+
+ if (next_state != NULL)
+ {
+ /* Matching transitions were found. Take the first one. */
+ state = next_state;
+
+ /* Update the tag values. */
+ if (next_tags)
+ while (*next_tags >= 0)
+ tags[*next_tags++] = pos;
+ }
+ else
+ {
+ backtrack:
+ /* A matching transition was not found. Try to backtrack. */
+ if (stack->prev)
+ {
+ if (stack->item.state->assertions & ASSERT_BACKREF)
+ {
+ states_seen[stack->item.state_id] = 0;
+ }
+
+ BT_STACK_POP();
+ }
+ else if (match_eo < 0)
+ {
+ /* Try starting from a later position in the input string. */
+ /* Check for end of string. */
+ if (next_c == L'\0')
+ {
+ break;
+ }
+ next_c = next_c_start;
+#ifdef TRE_MBSTATE
+ mbstate = mbstate_start;
+#endif /* TRE_MBSTATE */
+ str_byte = str_byte_start;
+ goto retry;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
+ *match_end_ofs = match_eo;
+
+ error_exit:
+ tre_bt_mem_destroy(mem);
+#ifndef TRE_USE_ALLOCA
+ if (tags)
+ xfree(tags);
+ if (pmatch)
+ xfree(pmatch);
+ if (states_seen)
+ xfree(states_seen);
+#endif /* !TRE_USE_ALLOCA */
+
+ return ret;
+}
+
+/***********************************************************************
+ from regexec.c
+***********************************************************************/
+
+/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
+ endpoint values. */
+static void
+tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
+ const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
+{
+ tre_submatch_data_t *submatch_data;
+ unsigned int i, j;
+ int *parents;
+
+ i = 0;
+ if (match_eo >= 0 && !(cflags & REG_NOSUB))
+ {
+ /* Construct submatch offsets from the tags. */
+ submatch_data = tnfa->submatch_data;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (submatch_data[i].so_tag == tnfa->end_tag)
+ pmatch[i].rm_so = match_eo;
+ else
+ pmatch[i].rm_so = tags[submatch_data[i].so_tag];
+
+ if (submatch_data[i].eo_tag == tnfa->end_tag)
+ pmatch[i].rm_eo = match_eo;
+ else
+ pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
+
+ /* If either of the endpoints were not used, this submatch
+ was not part of the match. */
+ if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+
+ i++;
+ }
+ /* Reset all submatches that are not within all of their parent
+ submatches. */
+ i = 0;
+ while (i < tnfa->num_submatches && i < nmatch)
+ {
+ if (pmatch[i].rm_eo == -1)
+ assert(pmatch[i].rm_so == -1);
+ assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
+
+ parents = submatch_data[i].parents;
+ if (parents != NULL)
+ for (j = 0; parents[j] >= 0; j++)
+ {
+ if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
+ || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
+ pmatch[i].rm_so = pmatch[i].rm_eo = -1;
+ }
+ i++;
+ }
+ }
+
+ while (i < nmatch)
+ {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ i++;
+ }
+}
+
+
+/*
+ Wrapper functions for POSIX compatible regexp matching.
+*/
+
+int
+regexec(const regex_t *restrict preg, const char *restrict string,
+ size_t nmatch, regmatch_t pmatch[restrict], int eflags)
+{
+ tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
+ reg_errcode_t status;
+ regoff_t *tags = NULL, eo;
+ if (tnfa->cflags & REG_NOSUB) nmatch = 0;
+ if (tnfa->num_tags > 0 && nmatch > 0)
+ {
+ tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
+ if (tags == NULL)
+ return REG_ESPACE;
+ }
+
+ /* Dispatch to the appropriate matcher. */
+ if (tnfa->have_backrefs)
+ {
+ /* The regex has back references, use the backtracking matcher. */
+ status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
+ }
+ else
+ {
+ /* Exact matching, no back references, use the parallel matcher. */
+ status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
+ }
+
+ if (status == REG_OK)
+ /* A match was found, so fill the submatch registers. */
+ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
+ if (tags)
+ xfree(tags);
+ return status;
+}
diff --git a/libc-top-half/musl/src/regex/tre-mem.c b/libc-top-half/musl/src/regex/tre-mem.c
new file mode 100644
index 0000000..86f809d
--- /dev/null
+++ b/libc-top-half/musl/src/regex/tre-mem.c
@@ -0,0 +1,158 @@
+/*
+ tre-mem.c - TRE memory allocator
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+/*
+ This memory allocator is for allocating small memory blocks efficiently
+ in terms of memory overhead and execution speed. The allocated blocks
+ cannot be freed individually, only all at once. There can be multiple
+ allocators, though.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "tre.h"
+
+/*
+ This memory allocator is for allocating small memory blocks efficiently
+ in terms of memory overhead and execution speed. The allocated blocks
+ cannot be freed individually, only all at once. There can be multiple
+ allocators, though.
+*/
+
+/* Returns a new memory allocator or NULL if out of memory. */
+tre_mem_t
+tre_mem_new_impl(int provided, void *provided_block)
+{
+ tre_mem_t mem;
+ if (provided)
+ {
+ mem = provided_block;
+ memset(mem, 0, sizeof(*mem));
+ }
+ else
+ mem = xcalloc(1, sizeof(*mem));
+ if (mem == NULL)
+ return NULL;
+ return mem;
+}
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+void
+tre_mem_destroy(tre_mem_t mem)
+{
+ tre_list_t *tmp, *l = mem->blocks;
+
+ while (l != NULL)
+ {
+ xfree(l->data);
+ tmp = l->next;
+ xfree(l);
+ l = tmp;
+ }
+ xfree(mem);
+}
+
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. */
+void *
+tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+ int zero, size_t size)
+{
+ void *ptr;
+
+ if (mem->failed)
+ {
+ return NULL;
+ }
+
+ if (mem->n < size)
+ {
+ /* We need more memory than is available in the current block.
+ Allocate a new block. */
+ tre_list_t *l;
+ if (provided)
+ {
+ if (provided_block == NULL)
+ {
+ mem->failed = 1;
+ return NULL;
+ }
+ mem->ptr = provided_block;
+ mem->n = TRE_MEM_BLOCK_SIZE;
+ }
+ else
+ {
+ int block_size;
+ if (size * 8 > TRE_MEM_BLOCK_SIZE)
+ block_size = size * 8;
+ else
+ block_size = TRE_MEM_BLOCK_SIZE;
+ l = xmalloc(sizeof(*l));
+ if (l == NULL)
+ {
+ mem->failed = 1;
+ return NULL;
+ }
+ l->data = xmalloc(block_size);
+ if (l->data == NULL)
+ {
+ xfree(l);
+ mem->failed = 1;
+ return NULL;
+ }
+ l->next = NULL;
+ if (mem->current != NULL)
+ mem->current->next = l;
+ if (mem->blocks == NULL)
+ mem->blocks = l;
+ mem->current = l;
+ mem->ptr = l->data;
+ mem->n = block_size;
+ }
+ }
+
+ /* Make sure the next pointer will be aligned. */
+ size += ALIGN(mem->ptr + size, long);
+
+ /* Allocate from current block. */
+ ptr = mem->ptr;
+ mem->ptr += size;
+ mem->n -= size;
+
+ /* Set to zero if needed. */
+ if (zero)
+ memset(ptr, 0, size);
+
+ return ptr;
+}
diff --git a/libc-top-half/musl/src/regex/tre.h b/libc-top-half/musl/src/regex/tre.h
new file mode 100644
index 0000000..aed64f3
--- /dev/null
+++ b/libc-top-half/musl/src/regex/tre.h
@@ -0,0 +1,233 @@
+/*
+ tre-internal.h - TRE internal definitions
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <regex.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#undef TRE_MBSTATE
+
+#ifdef __wasilibc_unmodified_upstream // Allow NDEBUG to be predefined.
+#define NDEBUG
+#endif
+
+#define TRE_REGEX_T_FIELD __opaque
+typedef int reg_errcode_t;
+
+typedef wchar_t tre_char_t;
+
+#define DPRINT(msg) do { } while(0)
+
+#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
+
+#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
+
+/* Wide characters. */
+typedef wint_t tre_cint_t;
+#define TRE_CHAR_MAX 0x10ffff
+
+#define tre_isalnum iswalnum
+#define tre_isalpha iswalpha
+#define tre_isblank iswblank
+#define tre_iscntrl iswcntrl
+#define tre_isdigit iswdigit
+#define tre_isgraph iswgraph
+#define tre_islower iswlower
+#define tre_isprint iswprint
+#define tre_ispunct iswpunct
+#define tre_isspace iswspace
+#define tre_isupper iswupper
+#define tre_isxdigit iswxdigit
+
+#define tre_tolower towlower
+#define tre_toupper towupper
+#define tre_strlen wcslen
+
+/* Use system provided iswctype() and wctype(). */
+typedef wctype_t tre_ctype_t;
+#define tre_isctype iswctype
+#define tre_ctype wctype
+
+/* Returns number of bytes to add to (char *)ptr to make it
+ properly aligned for the type. */
+#define ALIGN(ptr, type) \
+ ((((long)ptr) % sizeof(type)) \
+ ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
+ : 0)
+
+#undef MAX
+#undef MIN
+#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
+#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
+
+/* TNFA transition type. A TNFA state is an array of transitions,
+ the terminator is a transition with NULL `state'. */
+typedef struct tnfa_transition tre_tnfa_transition_t;
+
+struct tnfa_transition {
+ /* Range of accepted characters. */
+ tre_cint_t code_min;
+ tre_cint_t code_max;
+ /* Pointer to the destination state. */
+ tre_tnfa_transition_t *state;
+ /* ID number of the destination state. */
+ int state_id;
+ /* -1 terminated array of tags (or NULL). */
+ int *tags;
+ /* Assertion bitmap. */
+ int assertions;
+ /* Assertion parameters. */
+ union {
+ /* Character class assertion. */
+ tre_ctype_t class;
+ /* Back reference assertion. */
+ int backref;
+ } u;
+ /* Negative character class assertions. */
+ tre_ctype_t *neg_classes;
+};
+
+
+/* Assertions. */
+#define ASSERT_AT_BOL 1 /* Beginning of line. */
+#define ASSERT_AT_EOL 2 /* End of line. */
+#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */
+#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */
+#define ASSERT_AT_BOW 16 /* Beginning of word. */
+#define ASSERT_AT_EOW 32 /* End of word. */
+#define ASSERT_AT_WB 64 /* Word boundary. */
+#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */
+#define ASSERT_BACKREF 256 /* A back reference in `backref'. */
+#define ASSERT_LAST 256
+
+/* Tag directions. */
+typedef enum {
+ TRE_TAG_MINIMIZE = 0,
+ TRE_TAG_MAXIMIZE = 1
+} tre_tag_direction_t;
+
+/* Instructions to compute submatch register values from tag values
+ after a successful match. */
+struct tre_submatch_data {
+ /* Tag that gives the value for rm_so (submatch start offset). */
+ int so_tag;
+ /* Tag that gives the value for rm_eo (submatch end offset). */
+ int eo_tag;
+ /* List of submatches this submatch is contained in. */
+ int *parents;
+};
+
+typedef struct tre_submatch_data tre_submatch_data_t;
+
+
+/* TNFA definition. */
+typedef struct tnfa tre_tnfa_t;
+
+struct tnfa {
+ tre_tnfa_transition_t *transitions;
+ unsigned int num_transitions;
+ tre_tnfa_transition_t *initial;
+ tre_tnfa_transition_t *final;
+ tre_submatch_data_t *submatch_data;
+ char *firstpos_chars;
+ int first_char;
+ unsigned int num_submatches;
+ tre_tag_direction_t *tag_directions;
+ int *minimal_tags;
+ int num_tags;
+ int num_minimals;
+ int end_tag;
+ int num_states;
+ int cflags;
+ int have_backrefs;
+ int have_approx;
+};
+
+/* from tre-mem.h: */
+
+#define TRE_MEM_BLOCK_SIZE 1024
+
+typedef struct tre_list {
+ void *data;
+ struct tre_list *next;
+} tre_list_t;
+
+typedef struct tre_mem_struct {
+ tre_list_t *blocks;
+ tre_list_t *current;
+ char *ptr;
+ size_t n;
+ int failed;
+ void **provided;
+} *tre_mem_t;
+
+#define tre_mem_new_impl __tre_mem_new_impl
+#define tre_mem_alloc_impl __tre_mem_alloc_impl
+#define tre_mem_destroy __tre_mem_destroy
+
+hidden tre_mem_t tre_mem_new_impl(int provided, void *provided_block);
+hidden void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
+ int zero, size_t size);
+
+/* Returns a new memory allocator or NULL if out of memory. */
+#define tre_mem_new() tre_mem_new_impl(0, NULL)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. */
+#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size)
+
+/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the
+ allocated block or NULL if an underlying malloc() failed. The memory
+ is set to zero. */
+#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size)
+
+#ifdef TRE_USE_ALLOCA
+/* alloca() versions. Like above, but memory is allocated with alloca()
+ instead of malloc(). */
+
+#define tre_mem_newa() \
+ tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct)))
+
+#define tre_mem_alloca(mem, size) \
+ ((mem)->n >= (size) \
+ ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \
+ : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size)))
+#endif /* TRE_USE_ALLOCA */
+
+
+/* Frees the memory allocator and all memory allocated with it. */
+hidden void tre_mem_destroy(tre_mem_t mem);
+
+#define xmalloc malloc
+#define xcalloc calloc
+#define xfree free
+#define xrealloc realloc
+