1 files changed, 1270 insertions, 0 deletions
diff --git a/upstream/archlinux/man1/perlfaq6.1perl b/upstream/archlinux/man1/perlfaq6.1perl
new file mode 100644
index 00000000..6d4e40cf
--- /dev/null
+++ b/upstream/archlinux/man1/perlfaq6.1perl
@@ -0,0 +1,1270 @@
+.\" -*- mode: troff; coding: utf-8 -*-
+.\" Automatically generated by Pod::Man 5.01 (Pod::Simple 3.43)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
+.ie n \{\
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds C`
+.    ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.nr rF 0
+.if \n(.g .if rF .nr rF 1
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
+.        de IX
+.        tm Index:\\$1\t\\n%\t"\\$2"
+..
+.        if !\nF==2 \{\
+.            nr % 0
+.            nr F 2
+.        \}
+.    \}
+.\}
+.rr rF
+.\" ========================================================================
+.\"
+.IX Title "PERLFAQ6 1perl"
+.TH PERLFAQ6 1perl 2024-02-11 "perl v5.38.2" "Perl Programmers Reference Guide"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH NAME
+perlfaq6 \- Regular Expressions
+.SH VERSION
+.IX Header "VERSION"
+version 5.20210520
+.SH DESCRIPTION
+.IX Header "DESCRIPTION"
+This section is surprisingly small because the rest of the FAQ is
+littered with answers involving regular expressions. For example,
+decoding a URL and checking whether something is a number can be handled
+with regular expressions, but those answers are found elsewhere in
+this document (in perlfaq9: "How do I decode or create those %\-encodings
+on the web" and perlfaq4: "How do I determine whether a scalar is
+a number/whole/integer/float", to be precise).
+.SS "How can I hope to use regular expressions without creating illegible and unmaintainable code?"
+.IX Xref "regex, legibility regexp, legibility regular expression, legibility x"
+.IX Subsection "How can I hope to use regular expressions without creating illegible and unmaintainable code?"
+Three techniques can make regular expressions maintainable and
+understandable.
+.IP "Comments Outside the Regex" 4
+.IX Item "Comments Outside the Regex"
+Describe what you're doing and how you're doing it, using normal Perl
+comments.
+.Sp
+.Vb 3
+\&    # turn the line into the first word, a colon, and the
+\&    # number of characters on the rest of the line
+\&    s/^(\ew+)(.*)/ lc($1) . ":" . length($2) /meg;
+.Ve
+.IP "Comments Inside the Regex" 4
+.IX Item "Comments Inside the Regex"
+The \f(CW\*(C`/x\*(C'\fR modifier causes whitespace to be ignored in a regex pattern
+(except in a character class and a few other places), and also allows you to
+use normal comments there, too. As you can imagine, whitespace and comments
+help a lot.
+.Sp
+\&\f(CW\*(C`/x\*(C'\fR lets you turn this:
+.Sp
+.Vb 1
+\&    s{<(?:[^>\*(Aq"]*|".*?"|\*(Aq.*?\*(Aq)+>}{}gs;
+.Ve
+.Sp
+into this:
+.Sp
+.Vb 10
+\&    s{ <                    # opening angle bracket
+\&        (?:                 # Non\-backreffing grouping paren
+\&            [^>\*(Aq"] *        # 0 or more things that are neither > nor \*(Aq nor "
+\&                |           #    or else
+\&            ".*?"           # a section between double quotes (stingy match)
+\&                |           #    or else
+\&            \*(Aq.*?\*(Aq           # a section between single quotes (stingy match)
+\&        ) +                 #   all occurring one or more times
+\&        >                   # closing angle bracket
+\&    }{}gsx;                 # replace with nothing, i.e. delete
+.Ve
+.Sp
+It's still not quite so clear as prose, but it is very useful for
+describing the meaning of each part of the pattern.
+.IP "Different Delimiters" 4
+.IX Item "Different Delimiters"
+While we normally think of patterns as being delimited with \f(CW\*(C`/\*(C'\fR
+characters, they can be delimited by almost any character. perlre
+describes this. For example, the \f(CW\*(C`s///\*(C'\fR above uses braces as
+delimiters. Selecting another delimiter can avoid quoting the
+delimiter within the pattern:
+.Sp
+.Vb 2
+\&    s/\e/usr\e/local/\e/usr\e/share/g;    # bad delimiter choice
+\&    s#/usr/local#/usr/share#g;        # better
+.Ve
+.Sp
+Using logically paired delimiters can be even more readable:
+.Sp
+.Vb 1
+\&    s{/usr/local/}{/usr/share}g;      # better still
+.Ve
+.SS "I'm having trouble matching over more than one line. What's wrong?"
+.IX Xref "regex, multiline regexp, multiline regular expression, multiline"
+.IX Subsection "I'm having trouble matching over more than one line. What's wrong?"
+Either you don't have more than one line in the string you're looking
+at (probably), or else you aren't using the correct modifier(s) on
+your pattern (possibly).
+.PP
+There are many ways to get multiline data into a string. If you want
+it to happen automatically while reading input, you'll want to set $/
+(probably to '' for paragraphs or \f(CW\*(C`undef\*(C'\fR for the whole file) to
+allow you to read more than one line at a time.
+.PP
+Read perlre to help you decide which of \f(CW\*(C`/s\*(C'\fR and \f(CW\*(C`/m\*(C'\fR (or both)
+you might want to use: \f(CW\*(C`/s\*(C'\fR allows dot to include newline, and \f(CW\*(C`/m\*(C'\fR
+allows caret and dollar to match next to a newline, not just at the
+end of the string. You do need to make sure that you've actually
+got a multiline string in there.
+.PP
+For example, this program detects duplicate words, even when they span
+line breaks (but not paragraph ones). For this example, we don't need
+\&\f(CW\*(C`/s\*(C'\fR because we aren't using dot in a regular expression that we want
+to cross line boundaries. Neither do we need \f(CW\*(C`/m\*(C'\fR because we don't
+want caret or dollar to match at any point inside the record next
+to newlines. But it's imperative that $/ be set to something other
+than the default, or else we won't actually ever have a multiline
+record read in.
+.PP
+.Vb 6
+\&    $/ = \*(Aq\*(Aq;          # read in whole paragraph, not just one line
+\&    while ( <> ) {
+\&        while ( /\eb([\ew\*(Aq\-]+)(\es+\eg1)+\eb/gi ) {     # word starts alpha
+\&            print "Duplicate $1 at paragraph $.\en";
+\&        }
+\&    }
+.Ve
+.PP
+Here's some code that finds sentences that begin with "From " (which would
+be mangled by many mailers):
+.PP
+.Vb 6
+\&    $/ = \*(Aq\*(Aq;          # read in whole paragraph, not just one line
+\&    while ( <> ) {
+\&        while ( /^From /gm ) { # /m makes ^ match next to \en
+\&        print "leading From in paragraph $.\en";
+\&        }
+\&    }
+.Ve
+.PP
+Here's code that finds everything between START and END in a paragraph:
+.PP
+.Vb 6
+\&    undef $/;          # read in whole file, not just one line or paragraph
+\&    while ( <> ) {
+\&        while ( /START(.*?)END/sgm ) { # /s makes . cross line boundaries
+\&            print "$1\en";
+\&        }
+\&    }
+.Ve
+.SS "How can I pull out lines between two patterns that are themselves on different lines?"
+.IX Xref ".."
+.IX Subsection "How can I pull out lines between two patterns that are themselves on different lines?"
+You can use Perl's somewhat exotic \f(CW\*(C`..\*(C'\fR operator (documented in
+perlop):
+.PP
+.Vb 1
+\&    perl \-ne \*(Aqprint if /START/ .. /END/\*(Aq file1 file2 ...
+.Ve
+.PP
+If you wanted text and not lines, you would use
+.PP
+.Vb 1
+\&    perl \-0777 \-ne \*(Aqprint "$1\en" while /START(.*?)END/gs\*(Aq file1 file2 ...
+.Ve
+.PP
+But if you want nested occurrences of \f(CW\*(C`START\*(C'\fR through \f(CW\*(C`END\*(C'\fR, you'll
+run up against the problem described in the question in this section
+on matching balanced text.
+.PP
+Here's another example of using \f(CW\*(C`..\*(C'\fR:
+.PP
+.Vb 7
+\&    while (<>) {
+\&        my $in_header =   1  .. /^$/;
+\&        my $in_body   = /^$/ .. eof;
+\&    # now choose between them
+\&    } continue {
+\&        $. = 0 if eof;    # fix $.
+\&    }
+.Ve
+.SS "How do I match XML, HTML, or other nasty, ugly things with a regex?"
+.IX Xref "regex, XML regex, HTML XML HTML pain frustration sucking out, will to live"
+.IX Subsection "How do I match XML, HTML, or other nasty, ugly things with a regex?"
+Do not use regexes. Use a module and forget about the
+regular expressions. The XML::LibXML, HTML::TokeParser and
+HTML::TreeBuilder modules are good starts, although each namespace
+has other parsing modules specialized for certain tasks and different
+ways of doing it. Start at CPAN Search ( <http://metacpan.org/> )
+and wonder at all the work people have done for you already! :)
+.SS "I put a regular expression into $/ but it didn't work. What's wrong?"
+.IX Xref "$ , regexes in $INPUT_RECORD_SEPARATOR, regexes in $RS, regexes in"
+.IX Subsection "I put a regular expression into $/ but it didn't work. What's wrong?"
+$/ has to be a string. You can use these examples if you really need to
+do this.
+.PP
+If you have File::Stream, this is easy.
+.PP
+.Vb 1
+\&    use File::Stream;
+\&
+\&    my $stream = File::Stream\->new(
+\&        $filehandle,
+\&        separator => qr/\es*,\es*/,
+\&        );
+\&
+\&    print "$_\en" while <$stream>;
+.Ve
+.PP
+If you don't have File::Stream, you have to do a little more work.
+.PP
+You can use the four-argument form of sysread to continually add to
+a buffer. After you add to the buffer, you check if you have a
+complete line (using your regular expression).
+.PP
+.Vb 7
+\&    local $_ = "";
+\&    while( sysread FH, $_, 8192, length ) {
+\&        while( s/^((?s).*?)your_pattern// ) {
+\&            my $record = $1;
+\&            # do stuff here.
+\&        }
+\&    }
+.Ve
+.PP
+You can do the same thing with foreach and a match using the
+c flag and the \eG anchor, if you do not mind your entire file
+being in memory at the end.
+.PP
+.Vb 7
+\&    local $_ = "";
+\&    while( sysread FH, $_, 8192, length ) {
+\&        foreach my $record ( m/\eG((?s).*?)your_pattern/gc ) {
+\&            # do stuff here.
+\&        }
+\&        substr( $_, 0, pos ) = "" if pos;
+\&    }
+.Ve
+.SS "How do I substitute case-insensitively on the LHS while preserving case on the RHS?"
+.IX Xref "replace, case preserving substitute, case preserving substitution, case preserving s, case preserving"
+.IX Subsection "How do I substitute case-insensitively on the LHS while preserving case on the RHS?"
+Here's a lovely Perlish solution by Larry Rosler. It exploits
+properties of bitwise xor on ASCII strings.
+.PP
+.Vb 1
+\&    $_= "this is a TEsT case";
+\&
+\&    $old = \*(Aqtest\*(Aq;
+\&    $new = \*(Aqsuccess\*(Aq;
+\&
+\&    s{(\eQ$old\eE)}
+\&    { uc $new | (uc $1 ^ $1) .
+\&        (uc(substr $1, \-1) ^ substr $1, \-1) x
+\&        (length($new) \- length $1)
+\&    }egi;
+\&
+\&    print;
+.Ve
+.PP
+And here it is as a subroutine, modeled after the above:
+.PP
+.Vb 3
+\&    sub preserve_case {
+\&        my ($old, $new) = @_;
+\&        my $mask = uc $old ^ $old;
+\&
+\&        uc $new | $mask .
+\&            substr($mask, \-1) x (length($new) \- length($old))
+\&    }
+\&
+\&    $string = "this is a TEsT case";
+\&    $string =~ s/(test)/preserve_case($1, "success")/egi;
+\&    print "$string\en";
+.Ve
+.PP
+This prints:
+.PP
+.Vb 1
+\&    this is a SUcCESS case
+.Ve
+.PP
+As an alternative, to keep the case of the replacement word if it is
+longer than the original, you can use this code, by Jeff Pinyan:
+.PP
+.Vb 3
+\&    sub preserve_case {
+\&        my ($from, $to) = @_;
+\&        my ($lf, $lt) = map length, @_;
+\&
+\&        if ($lt < $lf) { $from = substr $from, 0, $lt }
+\&        else { $from .= substr $to, $lf }
+\&
+\&        return uc $to | ($from ^ uc $from);
+\&    }
+.Ve
+.PP
+This changes the sentence to "this is a SUcCess case."
+.PP
+Just to show that C programmers can write C in any programming language,
+if you prefer a more C\-like solution, the following script makes the
+substitution have the same case, letter by letter, as the original.
+(It also happens to run about 240% slower than the Perlish solution runs.)
+If the substitution has more characters than the string being substituted,
+the case of the last character is used for the rest of the substitution.
+.PP
+.Vb 8
+\&    # Original by Nathan Torkington, massaged by Jeffrey Friedl
+\&    #
+\&    sub preserve_case
+\&    {
+\&        my ($old, $new) = @_;
+\&        my $state = 0; # 0 = no change; 1 = lc; 2 = uc
+\&        my ($i, $oldlen, $newlen, $c) = (0, length($old), length($new));
+\&        my $len = $oldlen < $newlen ? $oldlen : $newlen;
+\&
+\&        for ($i = 0; $i < $len; $i++) {
+\&            if ($c = substr($old, $i, 1), $c =~ /[\eW\ed_]/) {
+\&                $state = 0;
+\&            } elsif (lc $c eq $c) {
+\&                substr($new, $i, 1) = lc(substr($new, $i, 1));
+\&                $state = 1;
+\&            } else {
+\&                substr($new, $i, 1) = uc(substr($new, $i, 1));
+\&                $state = 2;
+\&            }
+\&        }
+\&        # finish up with any remaining new (for when new is longer than old)
+\&        if ($newlen > $oldlen) {
+\&            if ($state == 1) {
+\&                substr($new, $oldlen) = lc(substr($new, $oldlen));
+\&            } elsif ($state == 2) {
+\&                substr($new, $oldlen) = uc(substr($new, $oldlen));
+\&            }
+\&        }
+\&        return $new;
+\&    }
+.Ve
+.ie n .SS "How can I make ""\ew"" match national character sets?"
+.el .SS "How can I make \f(CW\ew\fP match national character sets?"
+.IX Xref "\\w"
+.IX Subsection "How can I make w match national character sets?"
+Put \f(CW\*(C`use locale;\*(C'\fR in your script. The \ew character class is taken
+from the current locale.
+.PP
+See perllocale for details.
+.ie n .SS "How can I match a locale-smart version of ""/[a\-zA\-Z]/""?"
+.el .SS "How can I match a locale-smart version of \f(CW/[a\-zA\-Z]/\fP?"
+.IX Xref "alpha"
+.IX Subsection "How can I match a locale-smart version of /[a-zA-Z]/?"
+You can use the POSIX character class syntax \f(CW\*(C`/[[:alpha:]]/\*(C'\fR
+documented in perlre.
+.PP
+No matter which locale you are in, the alphabetic characters are
+the characters in \ew without the digits and the underscore.
+As a regex, that looks like \f(CW\*(C`/[^\eW\ed_]/\*(C'\fR. Its complement,
+the non-alphabetics, is then everything in \eW along with
+the digits and the underscore, or \f(CW\*(C`/[\eW\ed_]/\*(C'\fR.
+.SS "How can I quote a variable to use in a regex?"
+.IX Xref "regex, escaping regexp, escaping regular expression, escaping"
+.IX Subsection "How can I quote a variable to use in a regex?"
+The Perl parser will expand \f(CW$variable\fR and \f(CW@variable\fR references in
+regular expressions unless the delimiter is a single quote. Remember,
+too, that the right-hand side of a \f(CW\*(C`s///\*(C'\fR substitution is considered
+a double-quoted string (see perlop for more details). Remember
+also that any regex special characters will be acted on unless you
+precede the substitution with \eQ. Here's an example:
+.PP
+.Vb 2
+\&    $string = "Placido P. Octopus";
+\&    $regex  = "P.";
+\&
+\&    $string =~ s/$regex/Polyp/;
+\&    # $string is now "Polypacido P. Octopus"
+.Ve
+.PP
+Because \f(CW\*(C`.\*(C'\fR is special in regular expressions, and can match any
+single character, the regex \f(CW\*(C`P.\*(C'\fR here has matched the <Pl> in the
+original string.
+.PP
+To escape the special meaning of \f(CW\*(C`.\*(C'\fR, we use \f(CW\*(C`\eQ\*(C'\fR:
+.PP
+.Vb 2
+\&    $string = "Placido P. Octopus";
+\&    $regex  = "P.";
+\&
+\&    $string =~ s/\eQ$regex/Polyp/;
+\&    # $string is now "Placido Polyp Octopus"
+.Ve
+.PP
+The use of \f(CW\*(C`\eQ\*(C'\fR causes the \f(CW\*(C`.\*(C'\fR in the regex to be treated as a
+regular character, so that \f(CW\*(C`P.\*(C'\fR matches a \f(CW\*(C`P\*(C'\fR followed by a dot.
+.ie n .SS "What is ""/o"" really for?"
+.el .SS "What is \f(CW/o\fP really for?"
+.IX Xref " o, regular expressions compile, regular expressions"
+.IX Subsection "What is /o really for?"
+(contributed by brian d foy)
+.PP
+The \f(CW\*(C`/o\*(C'\fR option for regular expressions (documented in perlop and
+perlreref) tells Perl to compile the regular expression only once.
+This is only useful when the pattern contains a variable. Perls 5.6
+and later handle this automatically if the pattern does not change.
+.PP
+Since the match operator \f(CW\*(C`m//\*(C'\fR, the substitution operator \f(CW\*(C`s///\*(C'\fR,
+and the regular expression quoting operator \f(CW\*(C`qr//\*(C'\fR are double-quotish
+constructs, you can interpolate variables into the pattern. See the
+answer to "How can I quote a variable to use in a regex?" for more
+details.
+.PP
+This example takes a regular expression from the argument list and
+prints the lines of input that match it:
+.PP
+.Vb 1
+\&    my $pattern = shift @ARGV;
+\&
+\&    while( <> ) {
+\&        print if m/$pattern/;
+\&    }
+.Ve
+.PP
+Versions of Perl prior to 5.6 would recompile the regular expression
+for each iteration, even if \f(CW$pattern\fR had not changed. The \f(CW\*(C`/o\*(C'\fR
+would prevent this by telling Perl to compile the pattern the first
+time, then reuse that for subsequent iterations:
+.PP
+.Vb 1
+\&    my $pattern = shift @ARGV;
+\&
+\&    while( <> ) {
+\&        print if m/$pattern/o; # useful for Perl < 5.6
+\&    }
+.Ve
+.PP
+In versions 5.6 and later, Perl won't recompile the regular expression
+if the variable hasn't changed, so you probably don't need the \f(CW\*(C`/o\*(C'\fR
+option. It doesn't hurt, but it doesn't help either. If you want any
+version of Perl to compile the regular expression only once even if
+the variable changes (thus, only using its initial value), you still
+need the \f(CW\*(C`/o\*(C'\fR.
+.PP
+You can watch Perl's regular expression engine at work to verify for
+yourself if Perl is recompiling a regular expression. The \f(CWuse re
+\&\*(Aqdebug\*(Aq\fR pragma (comes with Perl 5.005 and later) shows the details.
+With Perls before 5.6, you should see \f(CW\*(C`re\*(C'\fR reporting that its
+compiling the regular expression on each iteration. With Perl 5.6 or
+later, you should only see \f(CW\*(C`re\*(C'\fR report that for the first iteration.
+.PP
+.Vb 1
+\&    use re \*(Aqdebug\*(Aq;
+\&
+\&    my $regex = \*(AqPerl\*(Aq;
+\&    foreach ( qw(Perl Java Ruby Python) ) {
+\&        print STDERR "\-" x 73, "\en";
+\&        print STDERR "Trying $_...\en";
+\&        print STDERR "\et$_ is good!\en" if m/$regex/;
+\&    }
+.Ve
+.SS "How do I use a regular expression to strip C\-style comments from a file?"
+.IX Subsection "How do I use a regular expression to strip C-style comments from a file?"
+While this actually can be done, it's much harder than you'd think.
+For example, this one-liner
+.PP
+.Vb 1
+\&    perl \-0777 \-pe \*(Aqs{/\e*.*?\e*/}{}gs\*(Aq foo.c
+.Ve
+.PP
+will work in many but not all cases. You see, it's too simple-minded for
+certain kinds of C programs, in particular, those with what appear to be
+comments in quoted strings. For that, you'd need something like this,
+created by Jeffrey Friedl and later modified by Fred Curtis.
+.PP
+.Vb 4
+\&    $/ = undef;
+\&    $_ = <>;
+\&    s#/\e*[^*]*\e*+([^/*][^*]*\e*+)*/|("(\e\e.|[^"\e\e])*"|\*(Aq(\e\e.|[^\*(Aq\e\e])*\*(Aq|.[^/"\*(Aq\e\e]*)#defined $2 ? $2 : ""#gse;
+\&    print;
+.Ve
+.PP
+This could, of course, be more legibly written with the \f(CW\*(C`/x\*(C'\fR modifier, adding
+whitespace and comments. Here it is expanded, courtesy of Fred Curtis.
+.PP
+.Vb 8
+\&    s{
+\&       /\e*         ##  Start of /* ... */ comment
+\&       [^*]*\e*+    ##  Non\-* followed by 1\-or\-more *\*(Aqs
+\&       (
+\&         [^/*][^*]*\e*+
+\&       )*          ##  0\-or\-more things which don\*(Aqt start with /
+\&                   ##    but do end with \*(Aq*\*(Aq
+\&       /           ##  End of /* ... */ comment
+\&
+\&     |         ##     OR  various things which aren\*(Aqt comments:
+\&
+\&       (
+\&         "           ##  Start of " ... " string
+\&         (
+\&           \e\e.           ##  Escaped char
+\&         |               ##    OR
+\&           [^"\e\e]        ##  Non "\e
+\&         )*
+\&         "           ##  End of " ... " string
+\&
+\&       |         ##     OR
+\&
+\&         \*(Aq           ##  Start of \*(Aq ... \*(Aq string
+\&         (
+\&           \e\e.           ##  Escaped char
+\&         |               ##    OR
+\&           [^\*(Aq\e\e]        ##  Non \*(Aq\e
+\&         )*
+\&         \*(Aq           ##  End of \*(Aq ... \*(Aq string
+\&
+\&       |         ##     OR
+\&
+\&         .           ##  Anything other char
+\&         [^/"\*(Aq\e\e]*   ##  Chars which doesn\*(Aqt start a comment, string or escape
+\&       )
+\&     }{defined $2 ? $2 : ""}gxse;
+.Ve
+.PP
+A slight modification also removes C++ comments, possibly spanning multiple lines
+using a continuation character:
+.PP
+.Vb 1
+\& s#/\e*[^*]*\e*+([^/*][^*]*\e*+)*/|//([^\e\e]|[^\en][\en]?)*?\en|("(\e\e.|[^"\e\e])*"|\*(Aq(\e\e.|[^\*(Aq\e\e])*\*(Aq|.[^/"\*(Aq\e\e]*)#defined $3 ? $3 : ""#gse;
+.Ve
+.SS "Can I use Perl regular expressions to match balanced text?"
+.IX Xref "regex, matching balanced test regexp, matching balanced test regular expression, matching balanced test possessive PARNO Text::Balanced Regexp::Common backtracking recursion"
+.IX Subsection "Can I use Perl regular expressions to match balanced text?"
+(contributed by brian d foy)
+.PP
+Your first try should probably be the Text::Balanced module, which
+is in the Perl standard library since Perl 5.8. It has a variety of
+functions to deal with tricky text. The Regexp::Common module can
+also help by providing canned patterns you can use.
+.PP
+As of Perl 5.10, you can match balanced text with regular expressions
+using recursive patterns. Before Perl 5.10, you had to resort to
+various tricks such as using Perl code in \f(CW\*(C`(??{})\*(C'\fR sequences.
+.PP
+Here's an example using a recursive regular expression. The goal is to
+capture all of the text within angle brackets, including the text in
+nested angle brackets. This sample text has two "major" groups: a
+group with one level of nesting and a group with two levels of
+nesting. There are five total groups in angle brackets:
+.PP
+.Vb 3
+\&    I have some <brackets in <nested brackets> > and
+\&    <another group <nested once <nested twice> > >
+\&    and that\*(Aqs it.
+.Ve
+.PP
+The regular expression to match the balanced text uses two new (to
+Perl 5.10) regular expression features. These are covered in perlre
+and this example is a modified version of one in that documentation.
+.PP
+First, adding the new possessive \f(CW\*(C`+\*(C'\fR to any quantifier finds the
+longest match and does not backtrack. That's important since you want
+to handle any angle brackets through the recursion, not backtracking.
+The group \f(CW\*(C`[^<>]++\*(C'\fR finds one or more non-angle brackets without
+backtracking.
+.PP
+Second, the new \f(CW\*(C`(?PARNO)\*(C'\fR refers to the sub-pattern in the
+particular capture group given by \f(CW\*(C`PARNO\*(C'\fR. In the following regex,
+the first capture group finds (and remembers) the balanced text, and
+you need that same pattern within the first buffer to get past the
+nested text. That's the recursive part. The \f(CW\*(C`(?1)\*(C'\fR uses the pattern
+in the outer capture group as an independent part of the regex.
+.PP
+Putting it all together, you have:
+.PP
+.Vb 1
+\&    #!/usr/local/bin/perl5.10.0
+\&
+\&    my $string =<<"HERE";
+\&    I have some <brackets in <nested brackets> > and
+\&    <another group <nested once <nested twice> > >
+\&    and that\*(Aqs it.
+\&    HERE
+\&
+\&    my @groups = $string =~ m/
+\&            (                   # start of capture group 1
+\&            <                   # match an opening angle bracket
+\&                (?:
+\&                    [^<>]++     # one or more non angle brackets, non backtracking
+\&                      |
+\&                    (?1)        # found < or >, so recurse to capture group 1
+\&                )*
+\&            >                   # match a closing angle bracket
+\&            )                   # end of capture group 1
+\&            /xg;
+\&
+\&    $" = "\en\et";
+\&    print "Found:\en\et@groups\en";
+.Ve
+.PP
+The output shows that Perl found the two major groups:
+.PP
+.Vb 3
+\&    Found:
+\&        <brackets in <nested brackets> >
+\&        <another group <nested once <nested twice> > >
+.Ve
+.PP
+With a little extra work, you can get all of the groups in angle
+brackets even if they are in other angle brackets too. Each time you
+get a balanced match, remove its outer delimiter (that's the one you
+just matched so don't match it again) and add it to a queue of strings
+to process. Keep doing that until you get no matches:
+.PP
+.Vb 1
+\&    #!/usr/local/bin/perl5.10.0
+\&
+\&    my @queue =<<"HERE";
+\&    I have some <brackets in <nested brackets> > and
+\&    <another group <nested once <nested twice> > >
+\&    and that\*(Aqs it.
+\&    HERE
+\&
+\&    my $regex = qr/
+\&            (                   # start of bracket 1
+\&            <                   # match an opening angle bracket
+\&                (?:
+\&                    [^<>]++     # one or more non angle brackets, non backtracking
+\&                      |
+\&                    (?1)        # recurse to bracket 1
+\&                )*
+\&            >                   # match a closing angle bracket
+\&            )                   # end of bracket 1
+\&            /x;
+\&
+\&    $" = "\en\et";
+\&
+\&    while( @queue ) {
+\&        my $string = shift @queue;
+\&
+\&        my @groups = $string =~ m/$regex/g;
+\&        print "Found:\en\et@groups\en\en" if @groups;
+\&
+\&        unshift @queue, map { s/^<//; s/>$//; $_ } @groups;
+\&    }
+.Ve
+.PP
+The output shows all of the groups. The outermost matches show up
+first and the nested matches show up later:
+.PP
+.Vb 3
+\&    Found:
+\&        <brackets in <nested brackets> >
+\&        <another group <nested once <nested twice> > >
+\&
+\&    Found:
+\&        <nested brackets>
+\&
+\&    Found:
+\&        <nested once <nested twice> >
+\&
+\&    Found:
+\&        <nested twice>
+.Ve
+.SS "What does it mean that regexes are greedy? How can I get around it?"
+.IX Xref "greedy greediness"
+.IX Subsection "What does it mean that regexes are greedy? How can I get around it?"
+Most people mean that greedy regexes match as much as they can.
+Technically speaking, it's actually the quantifiers (\f(CW\*(C`?\*(C'\fR, \f(CW\*(C`*\*(C'\fR, \f(CW\*(C`+\*(C'\fR,
+\&\f(CW\*(C`{}\*(C'\fR) that are greedy rather than the whole pattern; Perl prefers local
+greed and immediate gratification to overall greed. To get non-greedy
+versions of the same quantifiers, use (\f(CW\*(C`??\*(C'\fR, \f(CW\*(C`*?\*(C'\fR, \f(CW\*(C`+?\*(C'\fR, \f(CW\*(C`{}?\*(C'\fR).
+.PP
+An example:
+.PP
+.Vb 3
+\&    my $s1 = my $s2 = "I am very very cold";
+\&    $s1 =~ s/ve.*y //;      # I am cold
+\&    $s2 =~ s/ve.*?y //;     # I am very cold
+.Ve
+.PP
+Notice how the second substitution stopped matching as soon as it
+encountered "y ". The \f(CW\*(C`*?\*(C'\fR quantifier effectively tells the regular
+expression engine to find a match as quickly as possible and pass
+control on to whatever is next in line, as you would if you were
+playing hot potato.
+.SS "How do I process each word on each line?"
+.IX Xref "word"
+.IX Subsection "How do I process each word on each line?"
+Use the split function:
+.PP
+.Vb 5
+\&    while (<>) {
+\&        foreach my $word ( split ) {
+\&            # do something with $word here
+\&        }
+\&    }
+.Ve
+.PP
+Note that this isn't really a word in the English sense; it's just
+chunks of consecutive non-whitespace characters.
+.PP
+To work with only alphanumeric sequences (including underscores), you
+might consider
+.PP
+.Vb 5
+\&    while (<>) {
+\&        foreach $word (m/(\ew+)/g) {
+\&            # do something with $word here
+\&        }
+\&    }
+.Ve
+.SS "How can I print out a word-frequency or line-frequency summary?"
+.IX Subsection "How can I print out a word-frequency or line-frequency summary?"
+To do this, you have to parse out each word in the input stream. We'll
+pretend that by word you mean chunk of alphabetics, hyphens, or
+apostrophes, rather than the non-whitespace chunk idea of a word given
+in the previous question:
+.PP
+.Vb 6
+\&    my (%seen);
+\&    while (<>) {
+\&        while ( /(\eb[^\eW_\ed][\ew\*(Aq\-]+\eb)/g ) {   # misses "\`sheep\*(Aq"
+\&            $seen{$1}++;
+\&        }
+\&    }
+\&
+\&    while ( my ($word, $count) = each %seen ) {
+\&        print "$count $word\en";
+\&    }
+.Ve
+.PP
+If you wanted to do the same thing for lines, you wouldn't need a
+regular expression:
+.PP
+.Vb 1
+\&    my (%seen);
+\&
+\&    while (<>) {
+\&        $seen{$_}++;
+\&    }
+\&
+\&    while ( my ($line, $count) = each %seen ) {
+\&        print "$count $line";
+\&    }
+.Ve
+.PP
+If you want these output in a sorted order, see perlfaq4: "How do I
+sort a hash (optionally by value instead of key)?".
+.SS "How can I do approximate matching?"
+.IX Xref "match, approximate matching, approximate"
+.IX Subsection "How can I do approximate matching?"
+See the module String::Approx available from CPAN.
+.SS "How do I efficiently match many regular expressions at once?"
+.IX Xref "regex, efficiency regexp, efficiency regular expression, efficiency"
+.IX Subsection "How do I efficiently match many regular expressions at once?"
+(contributed by brian d foy)
+.PP
+You want to
+avoid compiling a regular expression every time you want to match it.
+In this example, perl must recompile the regular expression for every
+iteration of the \f(CW\*(C`foreach\*(C'\fR loop since \f(CW$pattern\fR can change:
+.PP
+.Vb 1
+\&    my @patterns = qw( fo+ ba[rz] );
+\&
+\&    LINE: while( my $line = <> ) {
+\&        foreach my $pattern ( @patterns ) {
+\&            if( $line =~ m/\eb$pattern\eb/i ) {
+\&                print $line;
+\&                next LINE;
+\&            }
+\&        }
+\&    }
+.Ve
+.PP
+The \f(CW\*(C`qr//\*(C'\fR operator compiles a regular
+expression, but doesn't apply it. When you use the pre-compiled
+version of the regex, perl does less work. In this example, I inserted
+a \f(CW\*(C`map\*(C'\fR to turn each pattern into its pre-compiled form. The rest of
+the script is the same, but faster:
+.PP
+.Vb 1
+\&    my @patterns = map { qr/\eb$_\eb/i } qw( fo+ ba[rz] );
+\&
+\&    LINE: while( my $line = <> ) {
+\&        foreach my $pattern ( @patterns ) {
+\&            if( $line =~ m/$pattern/ ) {
+\&                print $line;
+\&                next LINE;
+\&            }
+\&        }
+\&    }
+.Ve
+.PP
+In some cases, you may be able to make several patterns into a single
+regular expression. Beware of situations that require backtracking
+though. In this example, the regex is only compiled once because
+\&\f(CW$regex\fR doesn't change between iterations:
+.PP
+.Vb 1
+\&    my $regex = join \*(Aq|\*(Aq, qw( fo+ ba[rz] );
+\&
+\&    while( my $line = <> ) {
+\&        print if $line =~ m/\eb(?:$regex)\eb/i;
+\&    }
+.Ve
+.PP
+The function "list2re" in Data::Munge on CPAN can also be used to form
+a single regex that matches a list of literal strings (not regexes).
+.PP
+For more details on regular expression efficiency, see \fIMastering
+Regular Expressions\fR by Jeffrey Friedl. He explains how the regular
+expressions engine works and why some patterns are surprisingly
+inefficient. Once you understand how perl applies regular expressions,
+you can tune them for individual situations.
+.ie n .SS "Why don't word-boundary searches with ""\eb"" work for me?"
+.el .SS "Why don't word-boundary searches with \f(CW\eb\fP work for me?"
+.IX Xref "\\b"
+.IX Subsection "Why don't word-boundary searches with b work for me?"
+(contributed by brian d foy)
+.PP
+Ensure that you know what \eb really does: it's the boundary between a
+word character, \ew, and something that isn't a word character. That
+thing that isn't a word character might be \eW, but it can also be the
+start or end of the string.
+.PP
+It's not (not!) the boundary between whitespace and non-whitespace,
+and it's not the stuff between words we use to create sentences.
+.PP
+In regex speak, a word boundary (\eb) is a "zero width assertion",
+meaning that it doesn't represent a character in the string, but a
+condition at a certain position.
+.PP
+For the regular expression, /\ebPerl\eb/, there has to be a word
+boundary before the "P" and after the "l". As long as something other
+than a word character precedes the "P" and succeeds the "l", the
+pattern will match. These strings match /\ebPerl\eb/.
+.PP
+.Vb 4
+\&    "Perl"    # no word char before "P" or after "l"
+\&    "Perl "   # same as previous (space is not a word char)
+\&    "\*(AqPerl\*(Aq"  # the "\*(Aq" char is not a word char
+\&    "Perl\*(Aqs"  # no word char before "P", non\-word char after "l"
+.Ve
+.PP
+These strings do not match /\ebPerl\eb/.
+.PP
+.Vb 2
+\&    "Perl_"   # "_" is a word char!
+\&    "Perler"  # no word char before "P", but one after "l"
+.Ve
+.PP
+You don't have to use \eb to match words though. You can look for
+non-word characters surrounded by word characters. These strings
+match the pattern /\eb'\eb/.
+.PP
+.Vb 2
+\&    "don\*(Aqt"   # the "\*(Aq" char is surrounded by "n" and "t"
+\&    "qep\*(Aqa\*(Aq"  # the "\*(Aq" char is surrounded by "p" and "a"
+.Ve
+.PP
+These strings do not match /\eb'\eb/.
+.PP
+.Vb 1
+\&    "foo\*(Aq"    # there is no word char after non\-word "\*(Aq"
+.Ve
+.PP
+You can also use the complement of \eb, \eB, to specify that there
+should not be a word boundary.
+.PP
+In the pattern /\eBam\eB/, there must be a word character before the "a"
+and after the "m". These patterns match /\eBam\eB/:
+.PP
+.Vb 2
+\&    "llama"   # "am" surrounded by word chars
+\&    "Samuel"  # same
+.Ve
+.PP
+These strings do not match /\eBam\eB/
+.PP
+.Vb 2
+\&    "Sam"      # no word boundary before "a", but one after "m"
+\&    "I am Sam" # "am" surrounded by non\-word chars
+.Ve
+.SS "Why does using $&, $`, or $' slow my program down?"
+.IX Xref "$MATCH $& $POSTMATCH $' $PREMATCH $`"
+.IX Subsection "Why does using $&, $`, or $' slow my program down?"
+(contributed by Anno Siegel)
+.PP
+Once Perl sees that you need one of these variables anywhere in the
+program, it provides them on each and every pattern match. That means
+that on every pattern match the entire string will be copied, part of it
+to $`, part to $&, and part to $'. Thus the penalty is most severe with
+long strings and patterns that match often. Avoid $&, $', and $` if you
+can, but if you can't, once you've used them at all, use them at will
+because you've already paid the price. Remember that some algorithms
+really appreciate them. As of the 5.005 release, the $& variable is no
+longer "expensive" the way the other two are.
+.PP
+Since Perl 5.6.1 the special variables @\- and @+ can functionally replace
+$`, $& and $'. These arrays contain pointers to the beginning and end
+of each match (see perlvar for the full story), so they give you
+essentially the same information, but without the risk of excessive
+string copying.
+.PP
+Perl 5.10 added three specials, \f(CW\*(C`${^MATCH}\*(C'\fR, \f(CW\*(C`${^PREMATCH}\*(C'\fR, and
+\&\f(CW\*(C`${^POSTMATCH}\*(C'\fR to do the same job but without the global performance
+penalty. Perl 5.10 only sets these variables if you compile or execute the
+regular expression with the \f(CW\*(C`/p\*(C'\fR modifier.
+.ie n .SS "What good is ""\eG"" in a regular expression?"
+.el .SS "What good is \f(CW\eG\fP in a regular expression?"
+.IX Xref "\\G"
+.IX Subsection "What good is G in a regular expression?"
+You use the \f(CW\*(C`\eG\*(C'\fR anchor to start the next match on the same
+string where the last match left off. The regular
+expression engine cannot skip over any characters to find
+the next match with this anchor, so \f(CW\*(C`\eG\*(C'\fR is similar to the
+beginning of string anchor, \f(CW\*(C`^\*(C'\fR. The \f(CW\*(C`\eG\*(C'\fR anchor is typically
+used with the \f(CW\*(C`g\*(C'\fR modifier. It uses the value of \f(CWpos()\fR
+as the position to start the next match. As the match
+operator makes successive matches, it updates \f(CWpos()\fR with the
+position of the next character past the last match (or the
+first character of the next match, depending on how you like
+to look at it). Each string has its own \f(CWpos()\fR value.
+.PP
+Suppose you want to match all of consecutive pairs of digits
+in a string like "1122a44" and stop matching when you
+encounter non-digits. You want to match \f(CW11\fR and \f(CW22\fR but
+the letter \f(CW\*(C`a\*(C'\fR shows up between \f(CW22\fR and \f(CW44\fR and you want
+to stop at \f(CW\*(C`a\*(C'\fR. Simply matching pairs of digits skips over
+the \f(CW\*(C`a\*(C'\fR and still matches \f(CW44\fR.
+.PP
+.Vb 2
+\&    $_ = "1122a44";
+\&    my @pairs = m/(\ed\ed)/g;   # qw( 11 22 44 )
+.Ve
+.PP
+If you use the \f(CW\*(C`\eG\*(C'\fR anchor, you force the match after \f(CW22\fR to
+start with the \f(CW\*(C`a\*(C'\fR. The regular expression cannot match
+there since it does not find a digit, so the next match
+fails and the match operator returns the pairs it already
+found.
+.PP
+.Vb 2
+\&    $_ = "1122a44";
+\&    my @pairs = m/\eG(\ed\ed)/g; # qw( 11 22 )
+.Ve
+.PP
+You can also use the \f(CW\*(C`\eG\*(C'\fR anchor in scalar context. You
+still need the \f(CW\*(C`g\*(C'\fR modifier.
+.PP
+.Vb 4
+\&    $_ = "1122a44";
+\&    while( m/\eG(\ed\ed)/g ) {
+\&        print "Found $1\en";
+\&    }
+.Ve
+.PP
+After the match fails at the letter \f(CW\*(C`a\*(C'\fR, perl resets \f(CWpos()\fR
+and the next match on the same string starts at the beginning.
+.PP
+.Vb 4
+\&    $_ = "1122a44";
+\&    while( m/\eG(\ed\ed)/g ) {
+\&        print "Found $1\en";
+\&    }
+\&
+\&    print "Found $1 after while" if m/(\ed\ed)/g; # finds "11"
+.Ve
+.PP
+You can disable \f(CWpos()\fR resets on fail with the \f(CW\*(C`c\*(C'\fR modifier, documented
+in perlop and perlreref. Subsequent matches start where the last
+successful match ended (the value of \f(CWpos()\fR) even if a match on the
+same string has failed in the meantime. In this case, the match after
+the \f(CWwhile()\fR loop starts at the \f(CW\*(C`a\*(C'\fR (where the last match stopped),
+and since it does not use any anchor it can skip over the \f(CW\*(C`a\*(C'\fR to find
+\&\f(CW44\fR.
+.PP
+.Vb 4
+\&    $_ = "1122a44";
+\&    while( m/\eG(\ed\ed)/gc ) {
+\&        print "Found $1\en";
+\&    }
+\&
+\&    print "Found $1 after while" if m/(\ed\ed)/g; # finds "44"
+.Ve
+.PP
+Typically you use the \f(CW\*(C`\eG\*(C'\fR anchor with the \f(CW\*(C`c\*(C'\fR modifier
+when you want to try a different match if one fails,
+such as in a tokenizer. Jeffrey Friedl offers this example
+which works in 5.004 or later.
+.PP
+.Vb 9
+\&    while (<>) {
+\&        chomp;
+\&        PARSER: {
+\&            m/ \eG( \ed+\eb    )/gcx   && do { print "number: $1\en";  redo; };
+\&            m/ \eG( \ew+      )/gcx   && do { print "word:   $1\en";  redo; };
+\&            m/ \eG( \es+      )/gcx   && do { print "space:  $1\en";  redo; };
+\&            m/ \eG( [^\ew\ed]+ )/gcx   && do { print "other:  $1\en";  redo; };
+\&        }
+\&    }
+.Ve
+.PP
+For each line, the \f(CW\*(C`PARSER\*(C'\fR loop first tries to match a series
+of digits followed by a word boundary. This match has to
+start at the place the last match left off (or the beginning
+of the string on the first match). Since \f(CW\*(C`m/ \eG( \ed+\eb
+)/gcx\*(C'\fR uses the \f(CW\*(C`c\*(C'\fR modifier, if the string does not match that
+regular expression, perl does not reset \fBpos()\fR and the next
+match starts at the same position to try a different
+pattern.
+.SS "Are Perl regexes DFAs or NFAs? Are they POSIX compliant?"
+.IX Xref "DFA NFA POSIX"
+.IX Subsection "Are Perl regexes DFAs or NFAs? Are they POSIX compliant?"
+While it's true that Perl's regular expressions resemble the DFAs
+(deterministic finite automata) of the \fBegrep\fR\|(1) program, they are in
+fact implemented as NFAs (non-deterministic finite automata) to allow
+backtracking and backreferencing. And they aren't POSIX-style either,
+because those guarantee worst-case behavior for all cases. (It seems
+that some people prefer guarantees of consistency, even when what's
+guaranteed is slowness.) See the book "Mastering Regular Expressions"
+(from O'Reilly) by Jeffrey Friedl for all the details you could ever
+hope to know on these matters (a full citation appears in
+perlfaq2).
+.SS "What's wrong with using grep in a void context?"
+.IX Xref "grep"
+.IX Subsection "What's wrong with using grep in a void context?"
+The problem is that grep builds a return list, regardless of the context.
+This means you're making Perl go to the trouble of building a list that
+you then just throw away. If the list is large, you waste both time and space.
+If your intent is to iterate over the list, then use a for loop for this
+purpose.
+.PP
+In perls older than 5.8.1, map suffers from this problem as well.
+But since 5.8.1, this has been fixed, and map is context aware \- in void
+context, no lists are constructed.
+.SS "How can I match strings with multibyte characters?"
+.IX Xref "regex, and multibyte characters regexp, and multibyte characters regular expression, and multibyte characters martian encoding, Martian"
+.IX Subsection "How can I match strings with multibyte characters?"
+Starting from Perl 5.6 Perl has had some level of multibyte character
+support. Perl 5.8 or later is recommended. Supported multibyte
+character repertoires include Unicode, and legacy encodings
+through the Encode module. See perluniintro, perlunicode,
+and Encode.
+.PP
+If you are stuck with older Perls, you can do Unicode with the
+Unicode::String module, and character conversions using the
+Unicode::Map8 and Unicode::Map modules. If you are using
+Japanese encodings, you might try using the jperl 5.005_03.
+.PP
+Finally, the following set of approaches was offered by Jeffrey
+Friedl, whose article in issue #5 of The Perl Journal talks about
+this very matter.
+.PP
+Let's suppose you have some weird Martian encoding where pairs of
+ASCII uppercase letters encode single Martian letters (i.e. the two
+bytes "CV" make a single Martian letter, as do the two bytes "SG",
+"VS", "XX", etc.). Other bytes represent single characters, just like
+ASCII.
+.PP
+So, the string of Martian "I am CVSGXX!" uses 12 bytes to encode the
+nine characters 'I', ' ', 'a', 'm', ' ', 'CV', 'SG', 'XX', '!'.
+.PP
+Now, say you want to search for the single character \f(CW\*(C`/GX/\*(C'\fR. Perl
+doesn't know about Martian, so it'll find the two bytes "GX" in the "I
+am CVSGXX!" string, even though that character isn't there: it just
+looks like it is because "SG" is next to "XX", but there's no real
+"GX". This is a big problem.
+.PP
+Here are a few ways, all painful, to deal with it:
+.PP
+.Vb 2
+\&    # Make sure adjacent "martian" bytes are no longer adjacent.
+\&    $martian =~ s/([A\-Z][A\-Z])/ $1 /g;
+\&
+\&    print "found GX!\en" if $martian =~ /GX/;
+.Ve
+.PP
+Or like this:
+.PP
+.Vb 6
+\&    my @chars = $martian =~ m/([A\-Z][A\-Z]|[^A\-Z])/g;
+\&    # above is conceptually similar to:     my @chars = $text =~ m/(.)/g;
+\&    #
+\&    foreach my $char (@chars) {
+\&        print "found GX!\en", last if $char eq \*(AqGX\*(Aq;
+\&    }
+.Ve
+.PP
+Or like this:
+.PP
+.Vb 6
+\&    while ($martian =~ m/\eG([A\-Z][A\-Z]|.)/gs) {  # \eG probably unneeded
+\&        if ($1 eq \*(AqGX\*(Aq) {
+\&            print "found GX!\en";
+\&            last;
+\&        }
+\&    }
+.Ve
+.PP
+Here's another, slightly less painful, way to do it from Benjamin
+Goldberg, who uses a zero-width negative look-behind assertion.
+.PP
+.Vb 5
+\&    print "found GX!\en" if    $martian =~ m/
+\&        (?<![A\-Z])
+\&        (?:[A\-Z][A\-Z])*?
+\&        GX
+\&        /x;
+.Ve
+.PP
+This succeeds if the "martian" character GX is in the string, and fails
+otherwise. If you don't like using (?<!), a zero-width negative
+look-behind assertion, you can replace (?<![A\-Z]) with (?:^|[^A\-Z]).
+.PP
+It does have the drawback of putting the wrong thing in $\-[0] and $+[0],
+but this usually can be worked around.
+.SS "How do I match a regular expression that's in a variable?"
+.IX Xref "regex, in variable eval regex quotemeta \\Q, regex \\E, regex qr"
+.IX Subsection "How do I match a regular expression that's in a variable?"
+(contributed by brian d foy)
+.PP
+We don't have to hard-code patterns into the match operator (or
+anything else that works with regular expressions). We can put the
+pattern in a variable for later use.
+.PP
+The match operator is a double quote context, so you can interpolate
+your variable just like a double quoted string. In this case, you
+read the regular expression as user input and store it in \f(CW$regex\fR.
+Once you have the pattern in \f(CW$regex\fR, you use that variable in the
+match operator.
+.PP
+.Vb 1
+\&    chomp( my $regex = <STDIN> );
+\&
+\&    if( $string =~ m/$regex/ ) { ... }
+.Ve
+.PP
+Any regular expression special characters in \f(CW$regex\fR are still
+special, and the pattern still has to be valid or Perl will complain.
+For instance, in this pattern there is an unpaired parenthesis.
+.PP
+.Vb 1
+\&    my $regex = "Unmatched ( paren";
+\&
+\&    "Two parens to bind them all" =~ m/$regex/;
+.Ve
+.PP
+When Perl compiles the regular expression, it treats the parenthesis
+as the start of a memory match. When it doesn't find the closing
+parenthesis, it complains:
+.PP
+.Vb 1
+\&    Unmatched ( in regex; marked by <\-\- HERE in m/Unmatched ( <\-\- HERE  paren/ at script line 3.
+.Ve
+.PP
+You can get around this in several ways depending on our situation.
+First, if you don't want any of the characters in the string to be
+special, you can escape them with \f(CW\*(C`quotemeta\*(C'\fR before you use the string.
+.PP
+.Vb 2
+\&    chomp( my $regex = <STDIN> );
+\&    $regex = quotemeta( $regex );
+\&
+\&    if( $string =~ m/$regex/ ) { ... }
+.Ve
+.PP
+You can also do this directly in the match operator using the \f(CW\*(C`\eQ\*(C'\fR
+and \f(CW\*(C`\eE\*(C'\fR sequences. The \f(CW\*(C`\eQ\*(C'\fR tells Perl where to start escaping
+special characters, and the \f(CW\*(C`\eE\*(C'\fR tells it where to stop (see perlop
+for more details).
+.PP
+.Vb 1
+\&    chomp( my $regex = <STDIN> );
+\&
+\&    if( $string =~ m/\eQ$regex\eE/ ) { ... }
+.Ve
+.PP
+Alternately, you can use \f(CW\*(C`qr//\*(C'\fR, the regular expression quote operator (see
+perlop for more details). It quotes and perhaps compiles the pattern,
+and you can apply regular expression flags to the pattern.
+.PP
+.Vb 1
+\&    chomp( my $input = <STDIN> );
+\&
+\&    my $regex = qr/$input/is;
+\&
+\&    $string =~ m/$regex/  # same as m/$input/is;
+.Ve
+.PP
+You might also want to trap any errors by wrapping an \f(CW\*(C`eval\*(C'\fR block
+around the whole thing.
+.PP
+.Vb 1
+\&    chomp( my $input = <STDIN> );
+\&
+\&    eval {
+\&        if( $string =~ m/\eQ$input\eE/ ) { ... }
+\&    };
+\&    warn $@ if $@;
+.Ve
+.PP
+Or...
+.PP
+.Vb 7
+\&    my $regex = eval { qr/$input/is };
+\&    if( defined $regex ) {
+\&        $string =~ m/$regex/;
+\&    }
+\&    else {
+\&        warn $@;
+\&    }
+.Ve
+.SH "AUTHOR AND COPYRIGHT"
+.IX Header "AUTHOR AND COPYRIGHT"
+Copyright (c) 1997\-2010 Tom Christiansen, Nathan Torkington, and
+other authors as noted. All rights reserved.
+.PP
+This documentation is free; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+.PP
+Irrespective of its distribution, all code examples in this file
+are hereby placed into the public domain. You are permitted and
+encouraged to use this code in your own programs for fun
+or for profit as you see fit. A simple comment in the code giving
+credit would be courteous but is not required.