summaryrefslogtreecommitdiffstats
path: root/upstream/mageia-cauldron/man3pm/re.3pm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:43:11 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:43:11 +0000
commitfc22b3d6507c6745911b9dfcc68f1e665ae13dbc (patch)
treece1e3bce06471410239a6f41282e328770aa404a /upstream/mageia-cauldron/man3pm/re.3pm
parentInitial commit. (diff)
downloadmanpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.tar.xz
manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.zip
Adding upstream version 4.22.0.upstream/4.22.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'upstream/mageia-cauldron/man3pm/re.3pm')
-rw-r--r--upstream/mageia-cauldron/man3pm/re.3pm635
1 files changed, 635 insertions, 0 deletions
diff --git a/upstream/mageia-cauldron/man3pm/re.3pm b/upstream/mageia-cauldron/man3pm/re.3pm
new file mode 100644
index 00000000..d3baa4ab
--- /dev/null
+++ b/upstream/mageia-cauldron/man3pm/re.3pm
@@ -0,0 +1,635 @@
+.\" -*- mode: troff; coding: utf-8 -*-
+.\" Automatically generated by Pod::Man 5.01 (Pod::Simple 3.43)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
+.ie n \{\
+. ds C` ""
+. ds C' ""
+'br\}
+.el\{\
+. ds C`
+. ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD. Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.nr rF 0
+.if \n(.g .if rF .nr rF 1
+.if (\n(rF:(\n(.g==0)) \{\
+. if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+..
+. if !\nF==2 \{\
+. nr % 0
+. nr F 2
+. \}
+. \}
+.\}
+.rr rF
+.\" ========================================================================
+.\"
+.IX Title "re 3pm"
+.TH re 3pm 2023-11-28 "perl v5.38.2" "Perl Programmers Reference Guide"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH NAME
+re \- Perl pragma to alter regular expression behaviour
+.SH SYNOPSIS
+.IX Header "SYNOPSIS"
+.Vb 2
+\& use re \*(Aqtaint\*(Aq;
+\& ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
+\&
+\& $pat = \*(Aq(?{ $foo = 1 })\*(Aq;
+\& use re \*(Aqeval\*(Aq;
+\& /foo${pat}bar/; # won\*(Aqt fail (when not under \-T
+\& # switch)
+\&
+\& {
+\& no re \*(Aqtaint\*(Aq; # the default
+\& ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
+\&
+\& no re \*(Aqeval\*(Aq; # the default
+\& /foo${pat}bar/; # disallowed (with or without \-T
+\& # switch)
+\& }
+\&
+\& use re \*(Aqstrict\*(Aq; # Raise warnings for more conditions
+\&
+\& use re \*(Aq/ix\*(Aq;
+\& "FOO" =~ / foo /; # /ix implied
+\& no re \*(Aq/x\*(Aq;
+\& "FOO" =~ /foo/; # just /i implied
+\&
+\& use re \*(Aqdebug\*(Aq; # output debugging info during
+\& /^(.*)$/s; # compile and run time
+\&
+\&
+\& use re \*(Aqdebugcolor\*(Aq; # same as \*(Aqdebug\*(Aq, but with colored
+\& # output
+\& ...
+\&
+\& use re qw(Debug All); # Same as "use re \*(Aqdebug\*(Aq", but you
+\& # can use "Debug" with things other
+\& # than \*(AqAll\*(Aq
+\& use re qw(Debug More); # \*(AqAll\*(Aq plus output more details
+\& no re qw(Debug ALL); # Turn on (almost) all re debugging
+\& # in this scope
+\&
+\& use re qw(is_regexp regexp_pattern); # import utility functions
+\& my ($pat,$mods)=regexp_pattern(qr/foo/i);
+\& if (is_regexp($obj)) {
+\& print "Got regexp: ",
+\& scalar regexp_pattern($obj); # just as perl would stringify
+\& } # it but no hassle with blessed
+\& # re\*(Aqs.
+.Ve
+.PP
+(We use $^X in these examples because it's tainted by default.)
+.SH DESCRIPTION
+.IX Header "DESCRIPTION"
+.SS "'taint' mode"
+.IX Subsection "'taint' mode"
+When \f(CW\*(C`use re \*(Aqtaint\*(Aq\*(C'\fR is in effect, and a tainted string is the target
+of a regexp, the regexp memories (or values returned by the m// operator
+in list context) are tainted. This feature is useful when regexp operations
+on tainted data aren't meant to extract safe substrings, but to perform
+other transformations.
+.SS "'eval' mode"
+.IX Subsection "'eval' mode"
+When \f(CW\*(C`use re \*(Aqeval\*(Aq\*(C'\fR is in effect, a regexp is allowed to contain
+\&\f(CW\*(C`(?{ ... })\*(C'\fR zero-width assertions and \f(CW\*(C`(??{ ... })\*(C'\fR postponed
+subexpressions that are derived from variable interpolation, rather than
+appearing literally within the regexp. That is normally disallowed, since
+it is a
+potential security risk. Note that this pragma is ignored when the regular
+expression is obtained from tainted data, i.e. evaluation is always
+disallowed with tainted regular expressions. See "(?{ code })" in perlre
+and "(??{ code })" in perlre.
+.PP
+For the purpose of this pragma, interpolation of precompiled regular
+expressions (i.e., the result of \f(CW\*(C`qr//\*(C'\fR) is \fInot\fR considered variable
+interpolation. Thus:
+.PP
+.Vb 1
+\& /foo${pat}bar/
+.Ve
+.PP
+\&\fIis\fR allowed if \f(CW$pat\fR is a precompiled regular expression, even
+if \f(CW$pat\fR contains \f(CW\*(C`(?{ ... })\*(C'\fR assertions or \f(CW\*(C`(??{ ... })\*(C'\fR subexpressions.
+.SS "'strict' mode"
+.IX Subsection "'strict' mode"
+Note that this is an experimental feature which may be changed or removed in a
+future Perl release.
+.PP
+When \f(CW\*(C`use re \*(Aqstrict\*(Aq\*(C'\fR is in effect, stricter checks are applied than
+otherwise when compiling regular expressions patterns. These may cause more
+warnings to be raised than otherwise, and more things to be fatal instead of
+just warnings. The purpose of this is to find and report at compile time some
+things, which may be legal, but have a reasonable possibility of not being the
+programmer's actual intent. This automatically turns on the \f(CW"regexp"\fR
+warnings category (if not already on) within its scope.
+.PP
+As an example of something that is caught under \f(CW\*(C`"strict\*(Aq\*(C'\fR, but not
+otherwise, is the pattern
+.PP
+.Vb 1
+\& qr/\exABC/
+.Ve
+.PP
+The \f(CW"\ex"\fR construct without curly braces should be followed by exactly two
+hex digits; this one is followed by three. This currently evaluates as
+equivalent to
+.PP
+.Vb 1
+\& qr/\ex{AB}C/
+.Ve
+.PP
+that is, the character whose code point value is \f(CW0xAB\fR, followed by the
+letter \f(CW\*(C`C\*(C'\fR. But since \f(CW\*(C`C\*(C'\fR is a hex digit, there is a reasonable chance
+that the intent was
+.PP
+.Vb 1
+\& qr/\ex{ABC}/
+.Ve
+.PP
+that is the single character at \f(CW0xABC\fR. Under \f(CW\*(Aqstrict\*(Aq\fR it is an error to
+not follow \f(CW\*(C`\ex\*(C'\fR with exactly two hex digits. When not under \f(CW\*(Aqstrict\*(Aq\fR a
+warning is generated if there is only one hex digit, and no warning is raised
+if there are more than two.
+.PP
+It is expected that what exactly \f(CW\*(Aqstrict\*(Aq\fR does will evolve over time as we
+gain experience with it. This means that programs that compile under it in
+today's Perl may not compile, or may have more or fewer warnings, in future
+Perls. There is no backwards compatibility promises with regards to it. Also
+there are already proposals for an alternate syntax for enabling it. For
+these reasons, using it will raise a \f(CW\*(C`experimental::re_strict\*(C'\fR class warning,
+unless that category is turned off.
+.PP
+Note that if a pattern compiled within \f(CW\*(Aqstrict\*(Aq\fR is recompiled, say by
+interpolating into another pattern, outside of \f(CW\*(Aqstrict\*(Aq\fR, it is not checked
+again for strictness. This is because if it works under strict it must work
+under non-strict.
+.SS "'/flags' mode"
+.IX Subsection "'/flags' mode"
+When \f(CW\*(C`use re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR is specified, the given \fIflags\fR are automatically
+added to every regular expression till the end of the lexical scope.
+\&\fIflags\fR can be any combination of
+\&\f(CW\*(Aqa\*(Aq\fR,
+\&\f(CW\*(Aqaa\*(Aq\fR,
+\&\f(CW\*(Aqd\*(Aq\fR,
+\&\f(CW\*(Aqi\*(Aq\fR,
+\&\f(CW\*(Aql\*(Aq\fR,
+\&\f(CW\*(Aqm\*(Aq\fR,
+\&\f(CW\*(Aqn\*(Aq\fR,
+\&\f(CW\*(Aqp\*(Aq\fR,
+\&\f(CW\*(Aqs\*(Aq\fR,
+\&\f(CW\*(Aqu\*(Aq\fR,
+\&\f(CW\*(Aqx\*(Aq\fR,
+and/or
+\&\f(CW\*(Aqxx\*(Aq\fR.
+.PP
+\&\f(CW\*(C`no re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR will turn off the effect of \f(CW\*(C`use re \*(Aq/\fR\f(CIflags\fR\f(CW\*(Aq\*(C'\fR for the
+given flags.
+.PP
+For example, if you want all your regular expressions to have /msxx on by
+default, simply put
+.PP
+.Vb 1
+\& use re \*(Aq/msxx\*(Aq;
+.Ve
+.PP
+at the top of your code.
+.PP
+The character set \f(CW\*(C`/adul\*(C'\fR flags cancel each other out. So, in this example,
+.PP
+.Vb 4
+\& use re "/u";
+\& "ss" =~ /\exdf/;
+\& use re "/d";
+\& "ss" =~ /\exdf/;
+.Ve
+.PP
+the second \f(CW\*(C`use re\*(C'\fR does an implicit \f(CW\*(C`no re \*(Aq/u\*(Aq\*(C'\fR.
+.PP
+Similarly,
+.PP
+.Vb 4
+\& use re "/xx"; # Doubled\-x
+\& ...
+\& use re "/x"; # Single x from here on
+\& ...
+.Ve
+.PP
+Turning on one of the character set flags with \f(CW\*(C`use re\*(C'\fR takes precedence over the
+\&\f(CW\*(C`locale\*(C'\fR pragma and the 'unicode_strings' \f(CW\*(C`feature\*(C'\fR, for regular
+expressions. Turning off one of these flags when it is active reverts to
+the behaviour specified by whatever other pragmata are in scope. For
+example:
+.PP
+.Vb 4
+\& use feature "unicode_strings";
+\& no re "/u"; # does nothing
+\& use re "/l";
+\& no re "/l"; # reverts to unicode_strings behaviour
+.Ve
+.SS "'debug' mode"
+.IX Subsection "'debug' mode"
+When \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR is in effect, perl emits debugging messages when
+compiling and using regular expressions. The output is the same as that
+obtained by running a \f(CW\*(C`\-DDEBUGGING\*(C'\fR\-enabled perl interpreter with the
+\&\fB\-Dr\fR switch. It may be quite voluminous depending on the complexity
+of the match. Using \f(CW\*(C`debugcolor\*(C'\fR instead of \f(CW\*(C`debug\*(C'\fR enables a
+form of output that can be used to get a colorful display on terminals
+that understand termcap color sequences. Set \f(CW$ENV{PERL_RE_TC}\fR to a
+comma-separated list of \f(CW\*(C`termcap\*(C'\fR properties to use for highlighting
+strings on/off, pre-point part on/off.
+See "Debugging Regular Expressions" in perldebug for additional info.
+.PP
+\&\fBNOTE\fR that the exact format of the \f(CW\*(C`debug\*(C'\fR mode is \fBNOT\fR considered
+to be an officially supported API of Perl. It is intended for debugging
+only and may change as the core development team deems appropriate
+without notice or deprecation in any release of Perl, major or minor.
+Any documentation of the output is purely advisory.
+.PP
+As of 5.9.5 the directive \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR and its equivalents are
+lexically scoped, as the other directives are. However they have both
+compile-time and run-time effects.
+.PP
+See "Pragmatic Modules" in perlmodlib.
+.SS "'Debug' mode"
+.IX Subsection "'Debug' mode"
+Similarly \f(CW\*(C`use re \*(AqDebug\*(Aq\*(C'\fR produces debugging output, the difference
+being that it allows the fine tuning of what debugging output will be
+emitted. Options are divided into three groups, those related to
+compilation, those related to execution and those related to special
+purposes.
+.PP
+\&\fBNOTE\fR that the options provided under the \f(CW\*(C`Debug\*(C'\fR mode and the exact
+format of the output they create is \fBNOT\fR considered to be an
+officially supported API of Perl. It is intended for debugging only and
+may change as the core development team deems appropriate without notice
+or deprecation in any release of Perl, major or minor. Any documentation
+of the format or options available is advisory only and is subject to
+change without notice.
+.PP
+The options are as follows:
+.IP "Compile related options" 4
+.IX Item "Compile related options"
+.RS 4
+.PD 0
+.IP COMPILE 4
+.IX Item "COMPILE"
+.PD
+Turns on all non-extra compile related debug options.
+.IP PARSE 4
+.IX Item "PARSE"
+Turns on debug output related to the process of parsing the pattern.
+.IP OPTIMISE 4
+.IX Item "OPTIMISE"
+Enables output related to the optimisation phase of compilation.
+.IP TRIEC 4
+.IX Item "TRIEC"
+Detailed info about trie compilation.
+.IP DUMP 4
+.IX Item "DUMP"
+Dump the final program out after it is compiled and optimised.
+.IP FLAGS 4
+.IX Item "FLAGS"
+Dump the flags associated with the program
+.IP TEST 4
+.IX Item "TEST"
+Print output intended for testing the internals of the compile process
+.RE
+.RS 4
+.RE
+.IP "Execute related options" 4
+.IX Item "Execute related options"
+.RS 4
+.PD 0
+.IP EXECUTE 4
+.IX Item "EXECUTE"
+.PD
+Turns on all non-extra execute related debug options.
+.IP MATCH 4
+.IX Item "MATCH"
+Turns on debugging of the main matching loop.
+.IP TRIEE 4
+.IX Item "TRIEE"
+Extra debugging of how tries execute.
+.IP INTUIT 4
+.IX Item "INTUIT"
+Enable debugging of start-point optimisations.
+.RE
+.RS 4
+.RE
+.IP "Extra debugging options" 4
+.IX Item "Extra debugging options"
+.RS 4
+.PD 0
+.IP EXTRA 4
+.IX Item "EXTRA"
+.PD
+Turns on all "extra" debugging options.
+.IP BUFFERS 4
+.IX Item "BUFFERS"
+Enable debugging the capture group storage during match. Warning,
+this can potentially produce extremely large output.
+.IP TRIEM 4
+.IX Item "TRIEM"
+Enable enhanced TRIE debugging. Enhances both TRIEE
+and TRIEC.
+.IP STATE 4
+.IX Item "STATE"
+Enable debugging of states in the engine.
+.IP STACK 4
+.IX Item "STACK"
+Enable debugging of the recursion stack in the engine. Enabling
+or disabling this option automatically does the same for debugging
+states as well. This output from this can be quite large.
+.IP GPOS 4
+.IX Item "GPOS"
+Enable debugging of the \eG modifier.
+.IP OPTIMISEM 4
+.IX Item "OPTIMISEM"
+Enable enhanced optimisation debugging and start-point optimisations.
+Probably not useful except when debugging the regexp engine itself.
+.IP DUMP_PRE_OPTIMIZE 4
+.IX Item "DUMP_PRE_OPTIMIZE"
+Enable the dumping of the compiled pattern before the optimization phase.
+.IP WILDCARD 4
+.IX Item "WILDCARD"
+When Perl encounters a wildcard subpattern, (see "Wildcards in
+Property Values" in perlunicode), it suspends compilation of the main pattern, compiles the
+subpattern, and then matches that against all legal possibilities to determine
+the actual code points the subpattern matches. After that it adds these to
+the main pattern, and continues its compilation.
+.Sp
+You may very well want to see how your subpattern gets compiled, but it is
+likely of less use to you to see how Perl matches that against all the legal
+possibilities, as that is under control of Perl, not you. Therefore, the
+debugging information of the compilation portion is as specified by the other
+options, but the debugging output of the matching portion is normally
+suppressed.
+.Sp
+You can use the WILDCARD option to enable the debugging output of this
+subpattern matching. Careful! This can lead to voluminous outputs, and it
+may not make much sense to you what and why Perl is doing what it is.
+But it may be helpful to you to see why things aren't going the way you
+expect.
+.Sp
+Note that this option alone doesn't cause any debugging information to be
+output. What it does is stop the normal suppression of execution-related
+debugging information during the matching portion of the compilation of
+wildcards. You also have to specify which execution debugging information you
+want, such as by also including the EXECUTE option.
+.RE
+.RS 4
+.RE
+.IP "Other useful flags" 4
+.IX Item "Other useful flags"
+These are useful shortcuts to save on the typing.
+.RS 4
+.IP ALL 4
+.IX Item "ALL"
+Enable all options at once except BUFFERS, WILDCARD, and DUMP_PRE_OPTIMIZE.
+(To get every single option without exception, use both ALL and EXTRA, or
+starting in 5.30 on a \f(CW\*(C`\-DDEBUGGING\*(C'\fR\-enabled perl interpreter, use
+the \fB\-Drv\fR command-line switches.)
+.IP All 4
+.IX Item "All"
+Enable DUMP and all non-extra execute options. Equivalent to:
+.Sp
+.Vb 1
+\& use re \*(Aqdebug\*(Aq;
+.Ve
+.IP MORE 4
+.IX Item "MORE"
+.PD 0
+.IP More 4
+.IX Item "More"
+.PD
+Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM.
+.RE
+.RS 4
+.RE
+.PP
+As of 5.9.5 the directive \f(CW\*(C`use re \*(Aqdebug\*(Aq\*(C'\fR and its equivalents are
+lexically scoped, as are the other directives. However they have both
+compile-time and run-time effects.
+.SS "Exportable Functions"
+.IX Subsection "Exportable Functions"
+As of perl 5.9.5 're' debug contains a number of utility functions that
+may be optionally exported into the caller's namespace. They are listed
+below.
+.IP is_regexp($ref) 4
+.IX Item "is_regexp($ref)"
+Returns true if the argument is a compiled regular expression as returned
+by \f(CW\*(C`qr//\*(C'\fR, false if it is not.
+.Sp
+This function will not be confused by overloading or blessing. In
+internals terms, this extracts the regexp pointer out of the
+PERL_MAGIC_qr structure so it cannot be fooled.
+.IP regexp_pattern($ref) 4
+.IX Item "regexp_pattern($ref)"
+If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR,
+then this function returns the pattern.
+.Sp
+In list context it returns a two element list, the first element
+containing the pattern and the second containing the modifiers used when
+the pattern was compiled.
+.Sp
+.Vb 1
+\& my ($pat, $mods) = regexp_pattern($ref);
+.Ve
+.Sp
+In scalar context it returns the same as perl would when stringifying a raw
+\&\f(CW\*(C`qr//\*(C'\fR with the same pattern inside. If the argument is not a compiled
+reference then this routine returns false but defined in scalar context,
+and the empty list in list context. Thus the following
+.Sp
+.Vb 1
+\& if (regexp_pattern($ref) eq \*(Aq(?^i:foo)\*(Aq)
+.Ve
+.Sp
+will be warning free regardless of what \f(CW$ref\fR actually is.
+.Sp
+Like \f(CW\*(C`is_regexp\*(C'\fR this function will not be confused by overloading
+or blessing of the object.
+.IP regname($name,$all) 4
+.IX Item "regname($name,$all)"
+Returns the contents of a named buffer of the last successful match. If
+\&\f(CW$all\fR is true, then returns an array ref containing one entry per buffer,
+otherwise returns the first defined buffer.
+.IP regnames($all) 4
+.IX Item "regnames($all)"
+Returns a list of all of the named buffers defined in the last successful
+match. If \f(CW$all\fR is true, then it returns all names defined, if not it returns
+only names which were involved in the match.
+.IP \fBregnames_count()\fR 4
+.IX Item "regnames_count()"
+Returns the number of distinct names defined in the pattern used
+for the last successful match.
+.Sp
+\&\fBNote:\fR this result is always the actual number of distinct
+named buffers defined, it may not actually match that which is
+returned by \f(CWregnames()\fR and related routines when those routines
+have not been called with the \f(CW$all\fR parameter set.
+.IP regmust($ref) 4
+.IX Item "regmust($ref)"
+If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR,
+then this function returns what the optimiser considers to be the longest
+anchored fixed string and longest floating fixed string in the pattern.
+.Sp
+A \fIfixed string\fR is defined as being a substring that must appear for the
+pattern to match. An \fIanchored fixed string\fR is a fixed string that must
+appear at a particular offset from the beginning of the match. A \fIfloating
+fixed string\fR is defined as a fixed string that can appear at any point in
+a range of positions relative to the start of the match. For example,
+.Sp
+.Vb 3
+\& my $qr = qr/here .* there/x;
+\& my ($anchored, $floating) = regmust($qr);
+\& print "anchored:\*(Aq$anchored\*(Aq\enfloating:\*(Aq$floating\*(Aq\en";
+.Ve
+.Sp
+results in
+.Sp
+.Vb 2
+\& anchored:\*(Aqhere\*(Aq
+\& floating:\*(Aqthere\*(Aq
+.Ve
+.Sp
+Because the \f(CW\*(C`here\*(C'\fR is before the \f(CW\*(C`.*\*(C'\fR in the pattern, its position
+can be determined exactly. That's not true, however, for the \f(CW\*(C`there\*(C'\fR;
+it could appear at any point after where the anchored string appeared.
+Perl uses both for its optimisations, preferring the longer, or, if they are
+equal, the floating.
+.Sp
+\&\fBNOTE:\fR This may not necessarily be the definitive longest anchored and
+floating string. This will be what the optimiser of the Perl that you
+are using thinks is the longest. If you believe that the result is wrong
+please report it via the perlbug utility.
+.IP optimization($ref) 4
+.IX Item "optimization($ref)"
+If the argument is a compiled regular expression as returned by \f(CW\*(C`qr//\*(C'\fR,
+then this function returns a hashref of the optimization information
+discovered at compile time, so we can write tests around it. If any
+other argument is given, returns \f(CW\*(C`undef\*(C'\fR.
+.Sp
+The hash contents are expected to change from time to time as we develop
+new ways to optimize \- no assumption of stability should be made, not
+even between minor versions of perl.
+.Sp
+For the current version, the hash will have the following contents:
+.RS 4
+.IP minlen 4
+.IX Item "minlen"
+An integer, the least number of characters in any string that can match.
+.IP minlenret 4
+.IX Item "minlenret"
+An integer, the least number of characters that can be in \f(CW$&\fR after a
+match. (Consider eg \f(CW\*(C` /ns(?=\ed)/ \*(C'\fR.)
+.IP gofs 4
+.IX Item "gofs"
+An integer, the number of characters before \f(CWpos()\fR to start match at.
+.IP noscan 4
+.IX Item "noscan"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR to indicate that any anchored/floating substrings
+found should not be used. (CHECKME: apparently this is set for an
+anchored pattern with no floating substring, but never used.)
+.IP isall 4
+.IX Item "isall"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR to indicate that the optimizer information is all
+that the regular expression contains, and thus one does not need to
+enter the regexp runtime engine at all.
+.IP "anchor SBOL" 4
+.IX Item "anchor SBOL"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to start of string.
+.IP "anchor MBOL" 4
+.IX Item "anchor MBOL"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to any start of line
+within the string.
+.IP "anchor GPOS" 4
+.IX Item "anchor GPOS"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR if the pattern is anchored to the end of the previous
+match.
+.IP skip 4
+.IX Item "skip"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR if the start class can match only the first of a run.
+.IP implicit 4
+.IX Item "implicit"
+A boolean, \f(CW\*(C`TRUE\*(C'\fR if a \f(CW\*(C`/.*/\*(C'\fR has been turned implicitly into a \f(CW\*(C`/^.*/\*(C'\fR.
+.IP anchored/floating 4
+.IX Item "anchored/floating"
+A byte string representing an anchored or floating substring respectively
+that any match must contain, or undef if no such substring was found, or
+if the substring would require utf8 to represent.
+.IP "anchored utf8/floating utf8" 4
+.IX Item "anchored utf8/floating utf8"
+A utf8 string representing an anchored or floating substring respectively
+that any match must contain, or undef if no such substring was found, or
+if the substring contains only 7\-bit ASCII characters.
+.IP "anchored min offset/floating min offset" 4
+.IX Item "anchored min offset/floating min offset"
+An integer, the first offset in characters from a match location at which
+we should look for the corresponding substring.
+.IP "anchored max offset/floating max offset" 4
+.IX Item "anchored max offset/floating max offset"
+An integer, the last offset in characters from a match location at which
+we should look for the corresponding substring.
+.Sp
+Ignored for anchored, so may be 0 or same as min.
+.IP "anchored end shift/floating end shift" 4
+.IX Item "anchored end shift/floating end shift"
+FIXME: not sure what this is, something to do with lookbehind. regcomp.c
+says:
+ When the final pattern is compiled and the data is moved from the
+ scan_data_t structure into the regexp structure the information
+ about lookbehind is factored in, with the information that would
+ have been lost precalculated in the end_shift field for the
+ associated string.
+.IP checking 4
+.IX Item "checking"
+A constant string, one of "anchored", "floating" or "none" to indicate
+which substring (if any) should be checked for first.
+.IP stclass 4
+.IX Item "stclass"
+A string representation of a character class ("start class") that must
+be the first character of any match.
+.Sp
+TODO: explain the representations.
+.RE
+.RS 4
+.RE
+.SH "SEE ALSO"
+.IX Header "SEE ALSO"
+"Pragmatic Modules" in perlmodlib.