diff options
Diffstat (limited to 'include/ap_regex.h')
-rw-r--r-- | include/ap_regex.h | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/include/ap_regex.h b/include/ap_regex.h new file mode 100644 index 0000000..50d5aba --- /dev/null +++ b/include/ap_regex.h @@ -0,0 +1,296 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This code is based on pcreposix.h from the PCRE Library distribution, + * as originally written by Philip Hazel <ph10@cam.ac.uk>, and forked by + * the Apache HTTP Server project to provide POSIX-style regex function + * wrappers around underlying PCRE library functions for httpd. + * + * The original source file pcreposix.h is copyright and licensed as follows; + + Copyright (c) 1997-2004 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/** + * @file ap_regex.h + * @brief Apache Regex defines + */ + +#ifndef AP_REGEX_H +#define AP_REGEX_H + +#include "apr.h" + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Options for ap_regcomp, ap_regexec, and ap_rxplus versions: */ + +#define AP_REG_ICASE 0x01 /** use a case-insensitive match */ +#define AP_REG_NEWLINE 0x02 /** don't match newlines against '.' etc */ +#define AP_REG_NOTBOL 0x04 /** ^ will not match against start-of-string */ +#define AP_REG_NOTEOL 0x08 /** $ will not match against end-of-string */ + +#define AP_REG_EXTENDED (0) /** unused */ +#define AP_REG_NOSUB (0) /** unused */ + +#define AP_REG_MULTI 0x10 /* perl's /g (needs fixing) */ +#define AP_REG_NOMEM 0x20 /* nomem in our code */ +#define AP_REG_DOTALL 0x40 /* perl's /s flag */ + +#define AP_REG_DOLLAR_ENDONLY 0x200 /* '$' matches at end of subject string only */ + +#define AP_REG_NO_DEFAULT 0x400 /**< Don't implicitely add AP_REG_DEFAULT options */ + +#define AP_REG_MATCH "MATCH_" /**< suggested prefix for ap_regname */ + +#define AP_REG_DEFAULT (AP_REG_DOTALL|AP_REG_DOLLAR_ENDONLY) + +/* Arguments for ap_pcre_version_string */ +enum { + AP_REG_PCRE_COMPILED = 0, /** PCRE version used during program compilation */ + AP_REG_PCRE_LOADED /** PCRE version loaded at runtime */ +}; + +/* Error values: */ +enum { + AP_REG_ASSERT = 1, /** internal error ? */ + AP_REG_ESPACE, /** failed to get memory */ + AP_REG_INVARG, /** invalid argument */ + AP_REG_NOMATCH /** match failed */ +}; + +/* The structure representing a compiled regular expression. */ +typedef struct { + void *re_pcre; + int re_nsub; + apr_size_t re_erroffset; +} ap_regex_t; + +/* The structure in which a captured offset is returned. */ +typedef struct { + int rm_so; + int rm_eo; +} ap_regmatch_t; + +/* The functions */ + +/** + * Return PCRE version string. + * @param which Either AP_REG_PCRE_COMPILED (PCRE version used + * during program compilation) or AP_REG_PCRE_LOADED + * (PCRE version used at runtime) + * @return The PCRE version string + */ +AP_DECLARE(const char *) ap_pcre_version_string(int which); + +/** + * Get default compile flags + * @return Bitwise OR of AP_REG_* flags + */ +AP_DECLARE(int) ap_regcomp_get_default_cflags(void); + +/** + * Set default compile flags + * @param cflags Bitwise OR of AP_REG_* flags + */ +AP_DECLARE(void) ap_regcomp_set_default_cflags(int cflags); + +/** + * Get the AP_REG_* corresponding to the string. + * @param name The name (i.e. AP_REG_<name>) + * @return The AP_REG_*, or zero if the string is unknown + * + */ +AP_DECLARE(int) ap_regcomp_default_cflag_by_name(const char *name); + +/** + * Compile a regular expression. + * @param preg Returned compiled regex + * @param regex The regular expression string + * @param cflags Bitwise OR of AP_REG_* flags (ICASE and NEWLINE supported, + * other flags are ignored) + * @return Zero on success or non-zero on error + */ +AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *regex, int cflags); + +/** + * Match a NUL-terminated string against a pre-compiled regex. + * @param preg The pre-compiled regex + * @param string The string to match + * @param nmatch Provide information regarding the location of any matches + * @param pmatch Provide information regarding the location of any matches + * @param eflags Bitwise OR of AP_REG_* flags (NOTBOL and NOTEOL supported, + * other flags are ignored) + * @return 0 for successful match, \p AP_REG_NOMATCH otherwise + */ +AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, + apr_size_t nmatch, ap_regmatch_t *pmatch, int eflags); + +/** + * Match a string with given length against a pre-compiled regex. The string + * does not need to be NUL-terminated. + * @param preg The pre-compiled regex + * @param buff The string to match + * @param len Length of the string to match + * @param nmatch Provide information regarding the location of any matches + * @param pmatch Provide information regarding the location of any matches + * @param eflags Bitwise OR of AP_REG_* flags (NOTBOL and NOTEOL supported, + * other flags are ignored) + * @return 0 for successful match, AP_REG_NOMATCH otherwise + */ +AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, + apr_size_t len, apr_size_t nmatch, + ap_regmatch_t *pmatch, int eflags); + +/** + * Return the error code returned by regcomp or regexec into error messages + * @param errcode the error code returned by regexec or regcomp + * @param preg The precompiled regex + * @param errbuf A buffer to store the error in + * @param errbuf_size The size of the buffer + */ +AP_DECLARE(apr_size_t) ap_regerror(int errcode, const ap_regex_t *preg, + char *errbuf, apr_size_t errbuf_size); + +/** + * Return an array of named regex backreferences + * @param preg The precompiled regex + * @param names The array to which the names will be added + * @param prefix An optional prefix to add to the returned names. AP_REG_MATCH + * is the recommended prefix. + * @param upper If non zero, uppercase the names + */ +AP_DECLARE(int) ap_regname(const ap_regex_t *preg, + apr_array_header_t *names, const char *prefix, + int upper); + +/** Destroy a pre-compiled regex. + * @param preg The pre-compiled regex to free. + */ +AP_DECLARE(void) ap_regfree(ap_regex_t *preg); + +/* ap_rxplus: higher-level regexps */ + +typedef struct { + ap_regex_t rx; + apr_uint32_t flags; + const char *subs; + const char *match; + apr_size_t nmatch; + ap_regmatch_t *pmatch; +} ap_rxplus_t; + +/** + * Compile a pattern into a regexp. + * supports perl-like formats + * match-string + * /match-string/flags + * s/match-string/replacement-string/flags + * Intended to support more perl-like stuff as and when round tuits happen + * match-string is anything supported by ap_regcomp + * replacement-string is a substitution string as supported in ap_pregsub + * flags should correspond with perl syntax: treat failure to do so as a bug + * (documentation TBD) + * @param pool Pool to allocate from + * @param pattern Pattern to compile + * @return Compiled regexp, or NULL in case of compile/syntax error + */ +AP_DECLARE(ap_rxplus_t*) ap_rxplus_compile(apr_pool_t *pool, const char *pattern); +/** + * Apply a regexp operation to a string. + * @param pool Pool to allocate from + * @param rx The regex match to apply + * @param pattern The string to apply it to + * NOTE: This MUST be kept in scope to use regexp memory + * @param newpattern The modified string (ignored if the operation doesn't + * modify the string) + * @return Number of times a match happens. Normally 0 (no match) or 1 + * (match found), but may be greater if a transforming pattern + * is applied with the 'g' flag. + */ +AP_DECLARE(int) ap_rxplus_exec(apr_pool_t *pool, ap_rxplus_t *rx, + const char *pattern, char **newpattern); +#ifdef DOXYGEN +/** + * Number of matches in the regexp operation's memory + * This may be 0 if no match is in memory, or up to nmatch from compilation + * @param rx The regexp + * @return Number of matches in memory + */ +AP_DECLARE(int) ap_rxplus_nmatch(ap_rxplus_t *rx); +#else +#define ap_rxplus_nmatch(rx) (((rx)->match != NULL) ? (rx)->nmatch : 0) +#endif +/** + * Get a pointer to a match from regex memory + * NOTE: this relies on the match pattern from the last call to + * ap_rxplus_exec still being valid (i.e. not freed or out-of-scope) + * @param rx The regexp + * @param n The match number to retrieve (must be between 0 and nmatch) + * @param len Returns the length of the match. + * @param match Returns the match pattern + */ +AP_DECLARE(void) ap_rxplus_match(ap_rxplus_t *rx, int n, int *len, + const char **match); +/** + * Get a match from regex memory in a string copy + * NOTE: this relies on the match pattern from the last call to + * ap_rxplus_exec still being valid (i.e. not freed or out-of-scope) + * @param pool Pool to allocate from + * @param rx The regexp + * @param n The match number to retrieve (must be between 0 and nmatch) + * @return The matched string + */ +AP_DECLARE(char*) ap_rxplus_pmatch(apr_pool_t *pool, ap_rxplus_t *rx, int n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* AP_REGEX_T */ + |