diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:43:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:43:11 +0000 |
commit | fc22b3d6507c6745911b9dfcc68f1e665ae13dbc (patch) | |
tree | ce1e3bce06471410239a6f41282e328770aa404a /upstream/mageia-cauldron/man3pm/re.3pm | |
parent | Initial commit. (diff) | |
download | manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.tar.xz manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.zip |
Adding upstream version 4.22.0.upstream/4.22.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'upstream/mageia-cauldron/man3pm/re.3pm')
-rw-r--r-- | upstream/mageia-cauldron/man3pm/re.3pm | 635 |
1 files changed, 635 insertions, 0 deletions
diff --git a/upstream/mageia-cauldron/man3pm/re.3pm b/upstream/mageia-cauldron/man3pm/re.3pm new file mode 100644 index 00000000..d3baa4ab --- /dev/null +++ b/upstream/mageia-cauldron/man3pm/re.3pm @@ -0,0 +1,635 @@ +.\" -*- mode: troff; coding: utf-8 -*- +.\" Automatically generated by Pod::Man 5.01 (Pod::Simple 3.43) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>. +.ie n \{\ +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is >0, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{\ +. if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{\ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" ======================================================================== +.\" +.IX Title "re 3pm" +.TH re 3pm 2023-11-28 "perl v5.38.2" "Perl Programmers Reference Guide" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH NAME +re \- Perl pragma to alter regular expression behaviour +.SH SYNOPSIS +.IX Header "SYNOPSIS" +.Vb 2 +\& use re \*(Aqtaint\*(Aq; +\& ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here +\& +\& $pat = \*(Aq(?{ $foo = 1 })\*(Aq; +\& use re \*(Aqeval\*(Aq; +\& /foo${pat}bar/; # won\*(Aqt fail (when not under \-T +\& # switch) +\& +\& { +\& no re \*(Aqtaint\*(Aq; # the default +\& ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here +\& +\& no re \*(Aqeval\*(Aq; # the default +\& /foo${pat}bar/; # disallowed (with or without \-T +\& # switch) +\& } +\& +\& use re \*(Aqstrict\*(Aq; # Raise warnings for more conditions +\& +\& use re \*(Aq/ix\*(Aq; +\& "FOO" =~ / foo /; # /ix implied +\& no re \*(Aq/x\*(Aq; +\& "FOO" =~ /foo/; # just /i implied +\& +\& use re \*(Aqdebug\*(Aq; # output debugging info during +\& /^(.*)$/s; # compile and run time +\& +\& +\& use re \*(Aqdebugcolor\*(Aq; # same as \*(Aqdebug\*(Aq, but with colored +\& # output +\& ... +\& +\& use re qw(Debug All); # Same as "use re \*(Aqdebug\*(Aq", but you +\& # can use "Debug" with things other +\& # than \*(AqAll\*(Aq +\& use re qw(Debug More); # \*(AqAll\*(Aq plus output more details +\& no re qw(Debug ALL); # Turn on (almost) all re debugging +\& # in this scope +\& +\& use re qw(is_regexp regexp_pattern); # import utility functions +\& my ($pat,$mods)=regexp_pattern(qr/foo/i); +\& if (is_regexp($obj)) { +\& print "Got regexp: ", +\& scalar regexp_pattern($obj); # just as perl would stringify +\& } # it but no hassle with blessed +\& # re\*(Aqs. +.Ve +.PP +(We use $^X in these examples because it's tainted by default.) +.SH DESCRIPTION +.IX Header "DESCRIPTION" +.SS "'taint' mode" +.IX Subsection "'taint' mode" +When \f(CW\*(C`use re \*(Aqtaint\*(Aq\*(C'\fR is in effect, and a tainted string is the target +of a regexp, the regexp memories (or values returned by the m// operator +in list context) are tainted. This feature is useful when regexp operations +on tainted data aren't meant to extract safe substrings, but to perform +other transformations. +.SS "'eval' mode" +.IX Subsection "'eval' mode" +When \f(CW\*(C`use re \*(Aqeval\*(Aq\*(C'\fR is in effect, a regexp is allowed to contain +\&\f(CW\*(C`(?{ ... })\*(C'\fR zero-width assertions and \f(CW\*(C`(??{ ... })\*(C'\fR postponed +subexpressions that are derived from variable interpolation, rather than +appearing literally within the regexp. That is normally disallowed, since +it is a +potential security risk. Note that this pragma is ignored when the regular +expression is obtained from tainted data, i.e. evaluation is always +disallowed with tainted regular expressions. See "(?{ code })" in perlre +and "(??{ code })" in perlre. +.PP +For the purpose of this pragma, interpolation of precompiled regular +expressions (i.e., the result of \f(CW\*(C`qr//\*(C'\fR) is \fInot\fR considered variable +interpolation. Thus: +.PP +.Vb 1 +\& /foo${pat}bar/ +.Ve +.PP +\&\fIis\fR allowed if \f(CW$pat\fR is a precompiled regular expression, even +if \f(CW$pat\fR contains \f(CW\*(C`(?{ ... })\*(C'\fR assertions or \f(CW\*(C`(??{ ... })\*(C'\fR subexpressions. +.SS "'strict' mode" +.IX Subsection "'strict' mode" +Note that this is an experimental feature which may be changed or removed in a +future Perl release. +.PP +When \f(CW\*(C`use re \*(Aqstrict\*(Aq\*(C'\fR is in effect, stricter checks are applied than +otherwise when compiling regular expressions patterns. These may cause more +warnings to be raised than otherwise, and more things to be fatal instead of +just warnings. The purpose of this is to find and report at compile time some +things, which may be legal, but have a reasonable possibility of not being the +programmer's actual intent. This automatically turns on the \f(CW"regexp"\fR +warnings category (if not already on) within its scope. +.PP +As an example of something that is caught under \f(CW\*(C`"strict\*(Aq\*(C'\fR, but not +otherwise, is the pattern +.PP +.Vb 1 +\& qr/\exABC/ +.Ve +.PP +The \f(CW"\ex"\fR construct without curly braces should be followed by exactly two +hex digits; this one is followed by three. This currently evaluates as +equivalent to +.PP +.Vb 1 +\& qr/\ex{AB}C/ +.Ve +.PP +that is, the character whose code point value is \f(CW0xAB\fR, followed by the +letter \f(CW\*(C`C\*(C'\fR. But since \f(CW\*(C`C\*(C'\fR is a hex digit, there is a reasonable chance +that the intent was +.PP +.Vb 1 +\& qr/\ex{ABC}/ +.Ve +.PP +that is the single character at \f(CW0xABC\fR. Under \f(CW\*(Aqstrict\*(Aq\fR it is an error to +not follow \f(CW\*(C`\ex\*(C'\fR with exactly two hex digits. When not under \f(CW\*(Aqstrict\*(Aq\fR a +warning is generated if there is only one hex digit, and no warning is raised +if there are more than two. +.PP +It is expected that what exactly \f(CW\*(Aqstrict\*(Aq\fR does will evolve over time as we +gain experience with it. This means that programs that compile under it in +today's Perl may not compile, or may have more or fewer warnings, in future +Perls. There is no backwards compatibility promises with regards to it. Also +there are already proposals for an alternate syntax for enabling it. For +these reasons, using it will raise a \f(CW\*(C`experimental::re_strict\*(C'\fR class warning, +unless that category is turned off. +.PP +Note that if a pattern compiled within \f(CW\*(Aqstrict\*(Aq\fR is recompiled, say by +interpolating into another pattern, outside of \f(CW\*(Aqstrict\*(Aq\fR, it is not checked +again for strictness. This is because if it works under strict it must work +under non-strict. +.SS "'/flags' mode" +.IX Subsection "'/flags' mode" +When \f(CW\*(C`use re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR is specified, the given \fIflags\fR are automatically +added to every regular expression till the end of the lexical scope. +\&\fIflags\fR can be any combination of +\&\f(CW\*(Aqa\*(Aq\fR, +\&\f(CW\*(Aqaa\*(Aq\fR, +\&\f(CW\*(Aqd\*(Aq\fR, +\&\f(CW\*(Aqi\*(Aq\fR, +\&\f(CW\*(Aql\*(Aq\fR, +\&\f(CW\*(Aqm\*(Aq\fR, +\&\f(CW\*(Aqn\*(Aq\fR, +\&\f(CW\*(Aqp\*(Aq\fR, +\&\f(CW\*(Aqs\*(Aq\fR, +\&\f(CW\*(Aqu\*(Aq\fR, +\&\f(CW\*(Aqx\*(Aq\fR, +and/or +\&\f(CW\*(Aqxx\*(Aq\fR. +.PP +\&\f(CW\*(C`no re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR will turn off the effect of \f(CW\*(C`use re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR for the +given flags. +.PP +For example, if you want all your regular expressions to have /msxx on by +default, simply put +.PP +.Vb 1 +\& use re \*(Aq/msxx\*(Aq; +.Ve +.PP +at the top of your code. +.PP +The character set \f(CW\*(C`/adul\*(C'\fR flags cancel each other out. So, in this example, +.PP +.Vb 4 +\& use re "/u"; +\& "ss" =~ /\exdf/; +\& use re "/d"; +\& "ss" =~ /\exdf/; +.Ve +.PP +the second \f(CW\*(C`use re\*(C'\fR does an implicit \f(CW\*(C`no re \*(Aq/u\*(Aq\*(C'\fR. +.PP +Similarly, +.PP +.Vb 4 +\& use re "/xx"; # Doubled\-x +\& ... +\& use re "/x"; # Single x from here on +\& ... +.Ve +.PP +Turning on one of the character set flags with \f(CW\*(C`use re\*(C'\fR takes precedence over the +\&\f(CW\*(C`locale\*(C'\fR pragma and the 'unicode_strings' \f(CW\*(C`feature\*(C'\fR, for regular +expressions. Turning off one of these flags when it is active reverts to +the behaviour specified by whatever other pragmata are in scope. For +example: +.PP +.Vb 4 +\& use feature "unicode_strings"; +\& no re "/u"; # does nothing +\& use re "/l"; +\& no re "/l"; # reverts to unicode_strings behaviour +.Ve +.SS "'debug' mode" +.IX Subsection "'debug' mode" +When \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR is in effect, perl emits debugging messages when +compiling and using regular expressions. The output is the same as that +obtained by running a \f(CW\*(C`\-DDEBUGGING\*(C'\fR\-enabled perl interpreter with the +\&\fB\-Dr\fR switch. It may be quite voluminous depending on the complexity +of the match. Using \f(CW\*(C`debugcolor\*(C'\fR instead of \f(CW\*(C`debug\*(C'\fR enables a +form of output that can be used to get a colorful display on terminals +that understand termcap color sequences. Set \f(CW$ENV{PERL_RE_TC}\fR to a +comma-separated list of \f(CW\*(C`termcap\*(C'\fR properties to use for highlighting +strings on/off, pre-point part on/off. +See "Debugging Regular Expressions" in perldebug for additional info. +.PP +\&\fBNOTE\fR that the exact format of the \f(CW\*(C`debug\*(C'\fR mode is \fBNOT\fR considered +to be an officially supported API of Perl. It is intended for debugging +only and may change as the core development team deems appropriate +without notice or deprecation in any release of Perl, major or minor. +Any documentation of the output is purely advisory. +.PP +As of 5.9.5 the directive \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR and its equivalents are +lexically scoped, as the other directives are. However they have both +compile-time and run-time effects. +.PP +See "Pragmatic Modules" in perlmodlib. +.SS "'Debug' mode" +.IX Subsection "'Debug' mode" +Similarly \f(CW\*(C`use re \*(AqDebug\*(Aq\*(C'\fR produces debugging output, the difference +being that it allows the fine tuning of what debugging output will be +emitted. Options are divided into three groups, those related to +compilation, those related to execution and those related to special +purposes. +.PP +\&\fBNOTE\fR that the options provided under the \f(CW\*(C`Debug\*(C'\fR mode and the exact +format of the output they create is \fBNOT\fR considered to be an +officially supported API of Perl. It is intended for debugging only and +may change as the core development team deems appropriate without notice +or deprecation in any release of Perl, major or minor. Any documentation +of the format or options available is advisory only and is subject to +change without notice. +.PP +The options are as follows: +.IP "Compile related options" 4 +.IX Item "Compile related options" +.RS 4 +.PD 0 +.IP COMPILE 4 +.IX Item "COMPILE" +.PD +Turns on all non-extra compile related debug options. +.IP PARSE 4 +.IX Item "PARSE" +Turns on debug output related to the process of parsing the pattern. +.IP OPTIMISE 4 +.IX Item "OPTIMISE" +Enables output related to the optimisation phase of compilation. +.IP TRIEC 4 +.IX Item "TRIEC" +Detailed info about trie compilation. +.IP DUMP 4 +.IX Item "DUMP" +Dump the final program out after it is compiled and optimised. +.IP FLAGS 4 +.IX Item "FLAGS" +Dump the flags associated with the program +.IP TEST 4 +.IX Item "TEST" +Print output intended for testing the internals of the compile process +.RE +.RS 4 +.RE +.IP "Execute related options" 4 +.IX Item "Execute related options" +.RS 4 +.PD 0 +.IP EXECUTE 4 +.IX Item "EXECUTE" +.PD +Turns on all non-extra execute related debug options. +.IP MATCH 4 +.IX Item "MATCH" +Turns on debugging of the main matching loop. +.IP TRIEE 4 +.IX Item "TRIEE" +Extra debugging of how tries execute. +.IP INTUIT 4 +.IX Item "INTUIT" +Enable debugging of start-point optimisations. +.RE +.RS 4 +.RE +.IP "Extra debugging options" 4 +.IX Item "Extra debugging options" +.RS 4 +.PD 0 +.IP EXTRA 4 +.IX Item "EXTRA" +.PD +Turns on all "extra" debugging options. +.IP BUFFERS 4 +.IX Item "BUFFERS" +Enable debugging the capture group storage during match. Warning, +this can potentially produce extremely large output. +.IP TRIEM 4 +.IX Item "TRIEM" +Enable enhanced TRIE debugging. Enhances both TRIEE +and TRIEC. +.IP STATE 4 +.IX Item "STATE" +Enable debugging of states in the engine. +.IP STACK 4 +.IX Item "STACK" +Enable debugging of the recursion stack in the engine. Enabling +or disabling this option automatically does the same for debugging +states as well. This output from this can be quite large. +.IP GPOS 4 +.IX Item "GPOS" +Enable debugging of the \eG modifier. +.IP OPTIMISEM 4 +.IX Item "OPTIMISEM" +Enable enhanced optimisation debugging and start-point optimisations. +Probably not useful except when debugging the regexp engine itself. +.IP DUMP_PRE_OPTIMIZE 4 +.IX Item "DUMP_PRE_OPTIMIZE" +Enable the dumping of the compiled pattern before the optimization phase. +.IP WILDCARD 4 +.IX Item "WILDCARD" +When Perl encounters a wildcard subpattern, (see "Wildcards in +Property Values" in perlunicode), it suspends compilation of the main pattern, compiles the +subpattern, and then matches that against all legal possibilities to determine +the actual code points the subpattern matches. After that it adds these to +the main pattern, and continues its compilation. +.Sp +You may very well want to see how your subpattern gets compiled, but it is +likely of less use to you to see how Perl matches that against all the legal +possibilities, as that is under control of Perl, not you. Therefore, the +debugging information of the compilation portion is as specified by the other +options, but the debugging output of the matching portion is normally +suppressed. +.Sp +You can use the WILDCARD option to enable the debugging output of this +subpattern matching. Careful! This can lead to voluminous outputs, and it +may not make much sense to you what and why Perl is doing what it is. +But it may be helpful to you to see why things aren't going the way you +expect. +.Sp +Note that this option alone doesn't cause any debugging information to be +output. What it does is stop the normal suppression of execution-related +debugging information during the matching portion of the compilation of +wildcards. You also have to specify which execution debugging information you +want, such as by also including the EXECUTE option. +.RE +.RS 4 +.RE +.IP "Other useful flags" 4 +.IX Item "Other useful flags" +These are useful shortcuts to save on the typing. +.RS 4 +.IP ALL 4 +.IX Item "ALL" +Enable all options at once except BUFFERS, WILDCARD, and DUMP_PRE_OPTIMIZE. +(To get every single option without exception, use both ALL and EXTRA, or +starting in 5.30 on a \f(CW\*(C`\-DDEBUGGING\*(C'\fR\-enabled perl interpreter, use +the \fB\-Drv\fR command-line switches.) +.IP All 4 +.IX Item "All" +Enable DUMP and all non-extra execute options. Equivalent to: +.Sp +.Vb 1 +\& use re \*(Aqdebug\*(Aq; +.Ve +.IP MORE 4 +.IX Item "MORE" +.PD 0 +.IP More 4 +.IX Item "More" +.PD +Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM. +.RE +.RS 4 +.RE +.PP +As of 5.9.5 the directive \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR and its equivalents are +lexically scoped, as are the other directives. However they have both +compile-time and run-time effects. +.SS "Exportable Functions" +.IX Subsection "Exportable Functions" +As of perl 5.9.5 're' debug contains a number of utility functions that +may be optionally exported into the caller's namespace. They are listed +below. +.IP is_regexp($ref) 4 +.IX Item "is_regexp($ref)" +Returns true if the argument is a compiled regular expression as returned +by \f(CW\*(C`qr//\*(C'\fR, false if it is not. +.Sp +This function will not be confused by overloading or blessing. In +internals terms, this extracts the regexp pointer out of the +PERL_MAGIC_qr structure so it cannot be fooled. +.IP regexp_pattern($ref) 4 +.IX Item "regexp_pattern($ref)" +If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR, +then this function returns the pattern. +.Sp +In list context it returns a two element list, the first element +containing the pattern and the second containing the modifiers used when +the pattern was compiled. +.Sp +.Vb 1 +\& my ($pat, $mods) = regexp_pattern($ref); +.Ve +.Sp +In scalar context it returns the same as perl would when stringifying a raw +\&\f(CW\*(C`qr//\*(C'\fR with the same pattern inside. If the argument is not a compiled +reference then this routine returns false but defined in scalar context, +and the empty list in list context. Thus the following +.Sp +.Vb 1 +\& if (regexp_pattern($ref) eq \*(Aq(?^i:foo)\*(Aq) +.Ve +.Sp +will be warning free regardless of what \f(CW$ref\fR actually is. +.Sp +Like \f(CW\*(C`is_regexp\*(C'\fR this function will not be confused by overloading +or blessing of the object. +.IP regname($name,$all) 4 +.IX Item "regname($name,$all)" +Returns the contents of a named buffer of the last successful match. If +\&\f(CW$all\fR is true, then returns an array ref containing one entry per buffer, +otherwise returns the first defined buffer. +.IP regnames($all) 4 +.IX Item "regnames($all)" +Returns a list of all of the named buffers defined in the last successful +match. If \f(CW$all\fR is true, then it returns all names defined, if not it returns +only names which were involved in the match. +.IP \fBregnames_count()\fR 4 +.IX Item "regnames_count()" +Returns the number of distinct names defined in the pattern used +for the last successful match. +.Sp +\&\fBNote:\fR this result is always the actual number of distinct +named buffers defined, it may not actually match that which is +returned by \f(CWregnames()\fR and related routines when those routines +have not been called with the \f(CW$all\fR parameter set. +.IP regmust($ref) 4 +.IX Item "regmust($ref)" +If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR, +then this function returns what the optimiser considers to be the longest +anchored fixed string and longest floating fixed string in the pattern. +.Sp +A \fIfixed string\fR is defined as being a substring that must appear for the +pattern to match. An \fIanchored fixed string\fR is a fixed string that must +appear at a particular offset from the beginning of the match. A \fIfloating +fixed string\fR is defined as a fixed string that can appear at any point in +a range of positions relative to the start of the match. For example, +.Sp +.Vb 3 +\& my $qr = qr/here .* there/x; +\& my ($anchored, $floating) = regmust($qr); +\& print "anchored:\*(Aq$anchored\*(Aq\enfloating:\*(Aq$floating\*(Aq\en"; +.Ve +.Sp +results in +.Sp +.Vb 2 +\& anchored:\*(Aqhere\*(Aq +\& floating:\*(Aqthere\*(Aq +.Ve +.Sp +Because the \f(CW\*(C`here\*(C'\fR is before the \f(CW\*(C`.*\*(C'\fR in the pattern, its position +can be determined exactly. That's not true, however, for the \f(CW\*(C`there\*(C'\fR; +it could appear at any point after where the anchored string appeared. +Perl uses both for its optimisations, preferring the longer, or, if they are +equal, the floating. +.Sp +\&\fBNOTE:\fR This may not necessarily be the definitive longest anchored and +floating string. This will be what the optimiser of the Perl that you +are using thinks is the longest. If you believe that the result is wrong +please report it via the perlbug utility. +.IP optimization($ref) 4 +.IX Item "optimization($ref)" +If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR, +then this function returns a hashref of the optimization information +discovered at compile time, so we can write tests around it. If any +other argument is given, returns \f(CW\*(C`undef\*(C'\fR. +.Sp +The hash contents are expected to change from time to time as we develop +new ways to optimize \- no assumption of stability should be made, not +even between minor versions of perl. +.Sp +For the current version, the hash will have the following contents: +.RS 4 +.IP minlen 4 +.IX Item "minlen" +An integer, the least number of characters in any string that can match. +.IP minlenret 4 +.IX Item "minlenret" +An integer, the least number of characters that can be in \f(CW$&\fR after a +match. (Consider eg \f(CW\*(C` /ns(?=\ed)/ \*(C'\fR.) +.IP gofs 4 +.IX Item "gofs" +An integer, the number of characters before \f(CWpos()\fR to start match at. +.IP noscan 4 +.IX Item "noscan" +A boolean, \f(CW\*(C`TRUE\*(C'\fR to indicate that any anchored/floating substrings +found should not be used. (CHECKME: apparently this is set for an +anchored pattern with no floating substring, but never used.) +.IP isall 4 +.IX Item "isall" +A boolean, \f(CW\*(C`TRUE\*(C'\fR to indicate that the optimizer information is all +that the regular expression contains, and thus one does not need to +enter the regexp runtime engine at all. +.IP "anchor SBOL" 4 +.IX Item "anchor SBOL" +A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to start of string. +.IP "anchor MBOL" 4 +.IX Item "anchor MBOL" +A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to any start of line +within the string. +.IP "anchor GPOS" 4 +.IX Item "anchor GPOS" +A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to the end of the previous +match. +.IP skip 4 +.IX Item "skip" +A boolean, \f(CW\*(C`TRUE\*(C'\fR if the start class can match only the first of a run. +.IP implicit 4 +.IX Item "implicit" +A boolean, \f(CW\*(C`TRUE\*(C'\fR if a \f(CW\*(C`/.*/\*(C'\fR has been turned implicitly into a \f(CW\*(C`/^.*/\*(C'\fR. +.IP anchored/floating 4 +.IX Item "anchored/floating" +A byte string representing an anchored or floating substring respectively +that any match must contain, or undef if no such substring was found, or +if the substring would require utf8 to represent. +.IP "anchored utf8/floating utf8" 4 +.IX Item "anchored utf8/floating utf8" +A utf8 string representing an anchored or floating substring respectively +that any match must contain, or undef if no such substring was found, or +if the substring contains only 7\-bit ASCII characters. +.IP "anchored min offset/floating min offset" 4 +.IX Item "anchored min offset/floating min offset" +An integer, the first offset in characters from a match location at which +we should look for the corresponding substring. +.IP "anchored max offset/floating max offset" 4 +.IX Item "anchored max offset/floating max offset" +An integer, the last offset in characters from a match location at which +we should look for the corresponding substring. +.Sp +Ignored for anchored, so may be 0 or same as min. +.IP "anchored end shift/floating end shift" 4 +.IX Item "anchored end shift/floating end shift" +FIXME: not sure what this is, something to do with lookbehind. regcomp.c +says: + When the final pattern is compiled and the data is moved from the + scan_data_t structure into the regexp structure the information + about lookbehind is factored in, with the information that would + have been lost precalculated in the end_shift field for the + associated string. +.IP checking 4 +.IX Item "checking" +A constant string, one of "anchored", "floating" or "none" to indicate +which substring (if any) should be checked for first. +.IP stclass 4 +.IX Item "stclass" +A string representation of a character class ("start class") that must +be the first character of any match. +.Sp +TODO: explain the representations. +.RE +.RS 4 +.RE +.SH "SEE ALSO" +.IX Header "SEE ALSO" +"Pragmatic Modules" in perlmodlib. |