summaryrefslogtreecommitdiffstats
path: root/src/lib/regex.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/regex.c390
1 files changed, 390 insertions, 0 deletions
diff --git a/src/lib/regex.c b/src/lib/regex.c
new file mode 100644
index 0000000..64a6dbe
--- /dev/null
+++ b/src/lib/regex.c
@@ -0,0 +1,390 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+/**
+ * @file lib/regex.c
+ * @brief regex abstraction functions
+ *
+ * @copyright 2014 The FreeRADIUS server project
+ * @copyright 2014 Arran Cudbard-Bell <a.cudbardb@freeradius.org>
+ */
+
+#ifdef HAVE_REGEX
+#include <freeradius-devel/libradius.h>
+#include <freeradius-devel/regex.h>
+
+/*
+ * Wrapper functions for libpcre. Much more powerful, and guaranteed
+ * to be binary safe but require libpcre.
+ */
+# ifdef HAVE_PCRE
+/** Free regex_t structure
+ *
+ * Calls libpcre specific free functions for the expression and study.
+ *
+ * @param preg to free.
+ */
+static int _regex_free(regex_t *preg)
+{
+ if (preg->compiled) {
+ pcre_free(preg->compiled);
+ preg->compiled = NULL;
+ }
+
+ if (preg->extra) {
+#ifdef PCRE_CONFIG_JIT
+ pcre_free_study(preg->extra);
+#else
+ pcre_free(preg->extra);
+#endif
+ preg->extra = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * Replace the libpcre malloc and free functions with
+ * talloc wrappers. This allows us to use the subcapture copy
+ * functions and just reparent the memory allocated.
+ */
+static void *_pcre_malloc(size_t to_alloc)
+{
+ return talloc_array(NULL, uint8_t, to_alloc);
+}
+
+static void _pcre_free(void *to_free)
+{
+ talloc_free(to_free);
+}
+
+/** Wrapper around pcre_compile
+ *
+ * Allows the rest of the code to do compilations using one function signature.
+ *
+ * @note Compiled expression must be freed with talloc_free.
+ *
+ * @param out Where to write out a pointer to the structure containing the compiled expression.
+ * @param pattern to compile.
+ * @param len of pattern.
+ * @param ignore_case whether to do case insensitive matching.
+ * @param multiline If true $ matches newlines.
+ * @param subcaptures Whether to compile the regular expression to store subcapture
+ * data.
+ * @param runtime If false run the pattern through the PCRE JIT to convert it to machine code.
+ * This trades startup time (longer) for runtime performance (better).
+ * @return >= 1 on success, <= 0 on error. Negative value is offset of parse error.
+ */
+ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
+ bool ignore_case, bool multiline, bool subcaptures, bool runtime)
+{
+ char const *error;
+ int offset;
+ int cflags = 0;
+ regex_t *preg;
+
+ static bool setup = false;
+
+ /*
+ * Lets us use subcapture copy
+ */
+ if (!setup) {
+ pcre_malloc = _pcre_malloc;
+ pcre_free = _pcre_free;
+ setup = true;
+ }
+
+ *out = NULL;
+
+ if (len == 0) {
+ fr_strerror_printf("Empty expression");
+ return 0;
+ }
+
+ if (ignore_case) cflags |= PCRE_CASELESS;
+ if (multiline) cflags |= PCRE_MULTILINE;
+ if (!subcaptures) cflags |= PCRE_NO_AUTO_CAPTURE;
+
+ preg = talloc_zero(ctx, regex_t);
+ talloc_set_destructor(preg, _regex_free);
+
+ preg->compiled = pcre_compile(pattern, cflags, &error, &offset, NULL);
+ if (!preg->compiled) {
+ talloc_free(preg);
+ fr_strerror_printf("Pattern compilation failed: %s", error);
+
+ return -(ssize_t)offset;
+ }
+ if (!runtime) {
+ preg->precompiled = true;
+ preg->extra = pcre_study(preg->compiled, PCRE_STUDY_JIT_COMPILE, &error);
+ if (error) {
+ talloc_free(preg);
+ fr_strerror_printf("Pattern study failed: %s", error);
+
+ return 0;
+ }
+ }
+
+ *out = preg;
+
+ return len;
+}
+
+static const FR_NAME_NUMBER regex_pcre_error_str[] = {
+ { "PCRE_ERROR_NOMATCH", PCRE_ERROR_NOMATCH },
+ { "PCRE_ERROR_NULL", PCRE_ERROR_NULL },
+ { "PCRE_ERROR_BADOPTION", PCRE_ERROR_BADOPTION },
+ { "PCRE_ERROR_BADMAGIC", PCRE_ERROR_BADMAGIC },
+ { "PCRE_ERROR_UNKNOWN_OPCODE", PCRE_ERROR_UNKNOWN_OPCODE },
+ { "PCRE_ERROR_NOMEMORY", PCRE_ERROR_NOMEMORY },
+ { "PCRE_ERROR_NOSUBSTRING", PCRE_ERROR_NOSUBSTRING },
+ { "PCRE_ERROR_MATCHLIMIT", PCRE_ERROR_MATCHLIMIT },
+ { "PCRE_ERROR_CALLOUT", PCRE_ERROR_CALLOUT },
+ { "PCRE_ERROR_BADUTF8", PCRE_ERROR_BADUTF8 },
+ { "PCRE_ERROR_BADUTF8_OFFSET", PCRE_ERROR_BADUTF8_OFFSET },
+ { "PCRE_ERROR_PARTIAL", PCRE_ERROR_PARTIAL },
+ { "PCRE_ERROR_BADPARTIAL", PCRE_ERROR_BADPARTIAL },
+ { "PCRE_ERROR_INTERNAL", PCRE_ERROR_INTERNAL },
+ { "PCRE_ERROR_BADCOUNT", PCRE_ERROR_BADCOUNT },
+ { "PCRE_ERROR_DFA_UITEM", PCRE_ERROR_DFA_UITEM },
+ { "PCRE_ERROR_DFA_UCOND", PCRE_ERROR_DFA_UCOND },
+ { "PCRE_ERROR_DFA_UMLIMIT", PCRE_ERROR_DFA_UMLIMIT },
+ { "PCRE_ERROR_DFA_WSSIZE", PCRE_ERROR_DFA_WSSIZE },
+ { "PCRE_ERROR_DFA_RECURSE", PCRE_ERROR_DFA_RECURSE },
+ { "PCRE_ERROR_RECURSIONLIMIT", PCRE_ERROR_RECURSIONLIMIT },
+ { "PCRE_ERROR_NULLWSLIMIT", PCRE_ERROR_NULLWSLIMIT },
+ { "PCRE_ERROR_BADNEWLINE", PCRE_ERROR_BADNEWLINE },
+ { NULL, 0 }
+};
+
+/** Wrapper around pcre_exec
+ *
+ * @param preg The compiled expression.
+ * @param subject to match.
+ * @param len Length of subject.
+ * @param pmatch Array of match pointers.
+ * @param nmatch How big the match array is. Updated to number of matches.
+ * @return -1 on error, 0 on no match, 1 on match.
+ */
+int regex_exec(regex_t *preg, char const *subject, size_t len, regmatch_t pmatch[], size_t *nmatch)
+{
+ int ret;
+ size_t matches;
+
+ /*
+ * PCRE_NO_AUTO_CAPTURE is a compile time only flag,
+ * and can't be passed here.
+ * We rely on the fact that matches has been set to
+ * 0 as a hint that no subcapture data should be
+ * generated.
+ */
+ if (!pmatch || !nmatch) {
+ pmatch = NULL;
+ if (nmatch) *nmatch = 0;
+ matches = 0;
+ } else {
+ matches = *nmatch;
+ }
+
+ ret = pcre_exec(preg->compiled, preg->extra, subject, len, 0, 0, (int *)pmatch, matches * 3);
+ if (ret < 0) {
+ if (ret == PCRE_ERROR_NOMATCH) return 0;
+
+ fr_strerror_printf("regex evaluation failed with code (%i): %s", ret,
+ fr_int2str(regex_pcre_error_str, ret, "<INVALID>"));
+ return -1;
+ }
+
+ /*
+ * 0 signifies more offsets than we provided space for,
+ * so don't touch nmatches.
+ */
+ if (nmatch && (ret > 0)) *nmatch = ret;
+
+ return 1;
+}
+/*
+ * Wrapper functions for POSIX like, and extended regular
+ * expressions. These use the system regex library.
+ */
+# else
+/** Free heap allocated regex_t structure
+ *
+ * Heap allocation of regex_t is needed so regex_compile has the same signature with
+ * POSIX or libpcre.
+ *
+ * @param preg to free.
+ */
+static int _regex_free(regex_t *preg)
+{
+ regfree(preg);
+
+ return 0;
+}
+
+/** Binary safe wrapper around regcomp
+ *
+ * If we have the BSD extensions we don't need to do any special work
+ * if we don't have the BSD extensions we need to check to see if the
+ * regular expression contains any \0 bytes.
+ *
+ * If it does we fail and print the appropriate error message.
+ *
+ * @note Compiled expression must be freed with talloc_free.
+ *
+ * @param ctx To allocate memory in.
+ * @param out Where to write out a pointer to the structure containing the
+ * compiled expression.
+ * @param pattern to compile.
+ * @param len of pattern.
+ * @param ignore_case Whether the match should be case ignore_case.
+ * @param multiline If true $ matches newlines.
+ * @param subcaptures Whether to compile the regular expression to store subcapture
+ * data.
+ * @param runtime Whether the compilation is being done at runtime.
+ * @return >= 1 on success, <= 0 on error. Negative value is offset of parse error.
+ * With POSIX regex we only give the correct offset for embedded \0 errors.
+ */
+ssize_t regex_compile(TALLOC_CTX *ctx, regex_t **out, char const *pattern, size_t len,
+ bool ignore_case, bool multiline, bool subcaptures, UNUSED bool runtime)
+{
+ int ret;
+ int cflags = REG_EXTENDED;
+ regex_t *preg;
+
+ if (len == 0) {
+ fr_strerror_printf("Empty expression");
+ return 0;
+ }
+
+ if (ignore_case) cflags |= REG_ICASE;
+ if (multiline) cflags |= REG_NEWLINE;
+ if (!subcaptures) cflags |= REG_NOSUB;
+
+#ifndef HAVE_REGNCOMP
+ {
+ char const *p;
+
+ p = pattern;
+ p += strlen(pattern);
+
+ if ((size_t)(p - pattern) != len) {
+ fr_strerror_printf("Found null in pattern at offset %zu. Pattern unsafe for compilation",
+ (p - pattern));
+ return -(p - pattern);
+ }
+
+ preg = talloc_zero(ctx, regex_t);
+ if (!preg) return 0;
+
+ ret = regcomp(preg, pattern, cflags);
+ }
+#else
+ preg = talloc_zero(ctx, regex_t);
+ if (!preg) return 0;
+ ret = regncomp(preg, pattern, len, cflags);
+#endif
+ if (ret != 0) {
+ char errbuf[128];
+
+ regerror(ret, preg, errbuf, sizeof(errbuf));
+ fr_strerror_printf("Pattern compilation failed: %s", errbuf);
+
+ talloc_free(preg);
+
+ return 0; /* POSIX expressions don't give us the failure offset */
+ }
+
+ talloc_set_destructor(preg, _regex_free);
+ *out = preg;
+
+ return len;
+}
+
+/** Binary safe wrapper around regexec
+ *
+ * If we have the BSD extensions we don't need to do any special work
+ * If we don't have the BSD extensions we need to check to see if the
+ * value to be compared contains any \0 bytes.
+ *
+ * If it does, we fail and print the appropriate error message.
+ *
+ * @param preg The compiled expression.
+ * @param subject to match.
+ * @param pmatch Array of match pointers.
+ * @param nmatch How big the match array is. Updated to number of matches.
+ * @return -1 on error, 0 on no match, 1 on match.
+ */
+int regex_exec(regex_t *preg, char const *subject, size_t len, regmatch_t pmatch[], size_t *nmatch)
+{
+ int ret;
+ size_t matches;
+
+ /*
+ * Disable capturing
+ */
+ if (!pmatch || !nmatch) {
+ pmatch = NULL;
+ if (nmatch) *nmatch = 0;
+ matches = 0;
+ } else {
+ /* regexec does not seem to initialise unused elements */
+ matches = *nmatch;
+ memset(pmatch, 0, sizeof(pmatch[0]) * matches);
+ }
+
+#ifndef HAVE_REGNEXEC
+ {
+ char const *p;
+
+ p = subject;
+ p += strlen(subject);
+
+ if ((size_t)(p - subject) != len) {
+ fr_strerror_printf("Found null in subject at offset %zu. String unsafe for evaluation",
+ (p - subject));
+ return -1;
+ }
+ ret = regexec(preg, subject, matches, pmatch, 0);
+ }
+#else
+ ret = regnexec(preg, subject, len, matches, pmatch, 0);
+#endif
+ if (ret != 0) {
+ if (ret != REG_NOMATCH) {
+ char errbuf[128];
+
+ regerror(ret, preg, errbuf, sizeof(errbuf));
+
+ fr_strerror_printf("regex evaluation failed: %s", errbuf);
+ if (nmatch) *nmatch = 0;
+ return -1;
+ }
+ return 0;
+ }
+
+ /*
+ * Update *nmatch to be the maximum number of
+ * groups that *could* have been populated,
+ * need to check them later.
+ */
+ if (nmatch && (*nmatch > preg->re_nsub)) *nmatch = preg->re_nsub + 1;
+
+ return 1;
+}
+# endif
+#endif