diff options
Diffstat (limited to 'man3/regex.3')
-rw-r--r-- | man3/regex.3 | 412 |
1 files changed, 412 insertions, 0 deletions
diff --git a/man3/regex.3 b/man3/regex.3 new file mode 100644 index 0000000..fe6a6b3 --- /dev/null +++ b/man3/regex.3 @@ -0,0 +1,412 @@ +'\" t +.\" Copyright (C), 1995, Graeme W. Wilford. (Wilf.) +.\" Copyright 2023, Ahelenia ZiemiaĆska <nabijaczleweli@nabijaczleweli.xyz> +.\" Copyright 2023, Alejandro Colomar <alx@kernel.org> +.\" +.\" SPDX-License-Identifier: Linux-man-pages-copyleft +.\" +.\" Wed Jun 14 16:10:28 BST 1995 Wilf. (G.Wilford@ee.surrey.ac.uk) +.\" Tiny change in formatting - aeb, 950812 +.\" Modified 8 May 1998 by Joseph S. Myers (jsm28@cam.ac.uk) +.\" +.\" show the synopsis section nicely +.TH regex 3 2023-07-20 "Linux man-pages 6.05.01" +.SH NAME +regcomp, regexec, regerror, regfree \- POSIX regex functions +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.B #include <regex.h> +.PP +.BI "int regcomp(regex_t *restrict " preg ", const char *restrict " regex , +.BI " int " cflags ); +.BI "int regexec(const regex_t *restrict " preg \ +", const char *restrict " string , +.BI " size_t " nmatch ", \ +regmatch_t " pmatch "[_Nullable restrict ." nmatch ], +.BI " int " eflags ); +.PP +.BI "size_t regerror(int " errcode ", const regex_t *_Nullable restrict " preg , +.BI " char " errbuf "[_Nullable restrict ." errbuf_size ], +.BI " size_t " errbuf_size ); +.BI "void regfree(regex_t *" preg ); +.PP +.B typedef struct { +.B " size_t re_nsub;" +.B } regex_t; +.PP +.B typedef struct { +.B " regoff_t rm_so;" +.B " regoff_t rm_eo;" +.B } regmatch_t; +.PP +.BR typedef " /* ... */ " regoff_t; +.fi +.SH DESCRIPTION +.SS Compilation +.BR regcomp () +is used to compile a regular expression into a form that is suitable +for subsequent +.BR regexec () +searches. +.PP +On success, the pattern buffer at +.I *preg +is initialized. +.I regex +is a null-terminated string. +The locale must be the same when running +.BR regexec (). +.PP +After +.BR regcomp () +succeeds, +.I preg->re_nsub +holds the number of subexpressions in +.IR regex . +Thus, a value of +.I preg->re_nsub ++ 1 +passed as +.I nmatch +to +.BR regexec () +is sufficient to capture all matches. +.PP +.I cflags +is the +bitwise OR +of zero or more of the following: +.TP +.B REG_EXTENDED +Use +POSIX +Extended Regular Expression syntax when interpreting +.IR regex . +If not set, +POSIX +Basic Regular Expression syntax is used. +.TP +.B REG_ICASE +Do not differentiate case. +Subsequent +.BR regexec () +searches using this pattern buffer will be case insensitive. +.TP +.B REG_NOSUB +Report only overall success. +.BR regexec () +will use only +.I pmatch +for +.BR REG_STARTEND , +ignoring +.IR nmatch . +.TP +.B REG_NEWLINE +Match-any-character operators don't match a newline. +.IP +A nonmatching list +.RB ( [\[ha]...\&] ) +not containing a newline does not match a newline. +.IP +Match-beginning-of-line operator +.RB ( \[ha] ) +matches the empty string immediately after a newline, regardless of +whether +.IR eflags , +the execution flags of +.BR regexec (), +contains +.BR REG_NOTBOL . +.IP +Match-end-of-line operator +.RB ( $ ) +matches the empty string immediately before a newline, regardless of +whether +.I eflags +contains +.BR REG_NOTEOL . +.SS Matching +.BR regexec () +is used to match a null-terminated string +against the compiled pattern buffer in +.IR *preg , +which must have been initialised with +.BR regexec (). +.I eflags +is the +bitwise OR +of zero or more of the following flags: +.TP +.B REG_NOTBOL +The match-beginning-of-line operator always fails to match (but see the +compilation flag +.B REG_NEWLINE +above). +This flag may be used when different portions of a string are passed to +.BR regexec () +and the beginning of the string should not be interpreted as the +beginning of the line. +.TP +.B REG_NOTEOL +The match-end-of-line operator always fails to match (but see the +compilation flag +.B REG_NEWLINE +above). +.TP +.B REG_STARTEND +Match +.RI [ "string + pmatch[0].rm_so" , " string + pmatch[0].rm_eo" ) +instead of +.RI [ string , " string + strlen(string)" ). +This allows matching embedded NUL bytes +and avoids a +.BR strlen (3) +on known-length strings. +If any matches are returned +.RB ( REG_NOSUB +wasn't passed to +.BR regcomp (), +the match succeeded, and +.I nmatch +> 0), they overwrite +.I pmatch +as usual, and the match offsets remain relative to +.I string +(not +.IR "string + pmatch[0].rm_so" ). +This flag is a BSD extension, not present in POSIX. +.SS Match offsets +Unless +.B REG_NOSUB +was passed to +.BR regcomp (), +it is possible to +obtain the locations of matches within +.IR string : +.BR regexec () +fills +.I nmatch +elements of +.I pmatch +with results: +.I pmatch[0] +corresponds to the entire match, +.I pmatch[1] +to the first subexpression, etc. +If there were more matches than +.IR nmatch , +they are discarded; +if fewer, +unused elements of +.I pmatch +are filled with +.BR \-1 s. +.PP +Each returned valid +.RB (non- \-1 ) +match corresponds to the range +.RI [ "string + rm_so" , " string + rm_eo" ). +.PP +.I regoff_t +is a signed integer type +capable of storing the largest value that can be stored in either an +.I ptrdiff_t +type or a +.I ssize_t +type. +.SS Error reporting +.BR regerror () +is used to turn the error codes that can be returned by both +.BR regcomp () +and +.BR regexec () +into error message strings. +.PP +If +.I preg +isn't a null pointer, +.I errcode +must be the latest error returned from an operation on +.IR preg . +.PP +If +.I errbuf_size +isn't 0, up to +.I errbuf_size +bytes are copied to +.IR errbuf ; +the error string is always null-terminated, and truncated to fit. +.SS Freeing +.BR regfree () +deinitializes the pattern buffer at +.IR *preg , +freeing any associated memory; +.I *preg +must have been initialized via +.BR regcomp (). +.SH RETURN VALUE +.BR regcomp () +returns zero for a successful compilation or an error code for failure. +.PP +.BR regexec () +returns zero for a successful match or +.B REG_NOMATCH +for failure. +.PP +.BR regerror () +returns the size of the buffer required to hold the string. +.SH ERRORS +The following errors can be returned by +.BR regcomp (): +.TP +.B REG_BADBR +Invalid use of back reference operator. +.TP +.B REG_BADPAT +Invalid use of pattern operators such as group or list. +.TP +.B REG_BADRPT +Invalid use of repetition operators such as using \[aq]*\[aq] +as the first character. +.TP +.B REG_EBRACE +Un-matched brace interval operators. +.TP +.B REG_EBRACK +Un-matched bracket list operators. +.TP +.B REG_ECOLLATE +Invalid collating element. +.TP +.B REG_ECTYPE +Unknown character class name. +.TP +.B REG_EEND +Nonspecific error. +This is not defined by POSIX. +.TP +.B REG_EESCAPE +Trailing backslash. +.TP +.B REG_EPAREN +Un-matched parenthesis group operators. +.TP +.B REG_ERANGE +Invalid use of the range operator; for example, the ending point of the range +occurs prior to the starting point. +.TP +.B REG_ESIZE +Compiled regular expression requires a pattern buffer larger than 64\ kB. +This is not defined by POSIX. +.TP +.B REG_ESPACE +The regex routines ran out of memory. +.TP +.B REG_ESUBREG +Invalid back reference to a subexpression. +.SH ATTRIBUTES +For an explanation of the terms used in this section, see +.BR attributes (7). +.TS +allbox; +lbx lb lb +l l l. +Interface Attribute Value +T{ +.na +.nh +.BR regcomp (), +.BR regexec () +T} Thread safety MT-Safe locale +T{ +.na +.nh +.BR regerror () +T} Thread safety MT-Safe env +T{ +.na +.nh +.BR regfree () +T} Thread safety MT-Safe +.TE +.sp 1 +.SH STANDARDS +POSIX.1-2008. +.SH HISTORY +POSIX.1-2001. +.PP +Prior to POSIX.1-2008, +.I regoff_t +was required to be +capable of storing the largest value that can be stored in either an +.I off_t +type or a +.I ssize_t +type. +.SH CAVEATS +.I re_nsub +is only required to be initialized if +.B REG_NOSUB +wasn't specified, but all known implementations initialize it regardless. +.\" glibc, musl, 4.4BSD, illumos +.PP +Both +.I regex_t +and +.I regmatch_t +may (and do) have more members, in any order. +Always reference them by name. +.\" illumos has two more start/end pairs and the first one is of pointers +.SH EXAMPLES +.EX +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <regex.h> +\& +#define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0])) +\& +static const char *const str = + "1) John Driverhacker;\en2) John Doe;\en3) John Foo;\en"; +static const char *const re = "John.*o"; +\& +int main(void) +{ + static const char *s = str; + regex_t regex; + regmatch_t pmatch[1]; + regoff_t off, len; +\& + if (regcomp(®ex, re, REG_NEWLINE)) + exit(EXIT_FAILURE); +\& + printf("String = \e"%s\e"\en", str); + printf("Matches:\en"); +\& + for (unsigned int i = 0; ; i++) { + if (regexec(®ex, s, ARRAY_SIZE(pmatch), pmatch, 0)) + break; +\& + off = pmatch[0].rm_so + (s \- str); + len = pmatch[0].rm_eo \- pmatch[0].rm_so; + printf("#%zu:\en", i); + printf("offset = %jd; length = %jd\en", (intmax_t) off, + (intmax_t) len); + printf("substring = \e"%.*s\e"\en", len, s + pmatch[0].rm_so); +\& + s += pmatch[0].rm_eo; + } +\& + exit(EXIT_SUCCESS); +} +.EE +.SH SEE ALSO +.BR grep (1), +.BR regex (7) +.PP +The glibc manual section, +.I "Regular Expressions" |