summaryrefslogtreecommitdiffstats
path: root/src/regexp/exec_test.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
commitf6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch)
tree7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/regexp/exec_test.go
parentInitial commit. (diff)
downloadgolang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz
golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/regexp/exec_test.go')
-rw-r--r--src/regexp/exec_test.go747
1 files changed, 747 insertions, 0 deletions
diff --git a/src/regexp/exec_test.go b/src/regexp/exec_test.go
new file mode 100644
index 0000000..1694230
--- /dev/null
+++ b/src/regexp/exec_test.go
@@ -0,0 +1,747 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package regexp
+
+import (
+ "bufio"
+ "compress/bzip2"
+ "fmt"
+ "internal/testenv"
+ "io"
+ "os"
+ "path/filepath"
+ "regexp/syntax"
+ "strconv"
+ "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+// TestRE2 tests this package's regexp API against test cases
+// considered during RE2's exhaustive tests, which run all possible
+// regexps over a given set of atoms and operators, up to a given
+// complexity, over all possible strings over a given alphabet,
+// up to a given size. Rather than try to link with RE2, we read a
+// log file containing the test cases and the expected matches.
+// The log file, re2-exhaustive.txt, is generated by running 'make log'
+// in the open source RE2 distribution https://github.com/google/re2/.
+//
+// The test file format is a sequence of stanzas like:
+//
+// strings
+// "abc"
+// "123x"
+// regexps
+// "[a-z]+"
+// 0-3;0-3
+// -;-
+// "([0-9])([0-9])([0-9])"
+// -;-
+// -;0-3 0-1 1-2 2-3
+//
+// The stanza begins by defining a set of strings, quoted
+// using Go double-quote syntax, one per line. Then the
+// regexps section gives a sequence of regexps to run on
+// the strings. In the block that follows a regexp, each line
+// gives the semicolon-separated match results of running
+// the regexp on the corresponding string.
+// Each match result is either a single -, meaning no match, or a
+// space-separated sequence of pairs giving the match and
+// submatch indices. An unmatched subexpression formats
+// its pair as a single - (not illustrated above). For now
+// each regexp run produces two match results, one for a
+// “full match” that restricts the regexp to matching the entire
+// string or nothing, and one for a “partial match” that gives
+// the leftmost first match found in the string.
+//
+// Lines beginning with # are comments. Lines beginning with
+// a capital letter are test names printed during RE2's test suite
+// and are echoed into t but otherwise ignored.
+//
+// At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
+// so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
+func TestRE2Search(t *testing.T) {
+ testRE2(t, "testdata/re2-search.txt")
+}
+
+func testRE2(t *testing.T, file string) {
+ f, err := os.Open(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer f.Close()
+ var txt io.Reader
+ if strings.HasSuffix(file, ".bz2") {
+ z := bzip2.NewReader(f)
+ txt = z
+ file = file[:len(file)-len(".bz2")] // for error messages
+ } else {
+ txt = f
+ }
+ lineno := 0
+ scanner := bufio.NewScanner(txt)
+ var (
+ str []string
+ input []string
+ inStrings bool
+ re *Regexp
+ refull *Regexp
+ nfail int
+ ncase int
+ )
+ for lineno := 1; scanner.Scan(); lineno++ {
+ line := scanner.Text()
+ switch {
+ case line == "":
+ t.Fatalf("%s:%d: unexpected blank line", file, lineno)
+ case line[0] == '#':
+ continue
+ case 'A' <= line[0] && line[0] <= 'Z':
+ // Test name.
+ t.Logf("%s\n", line)
+ continue
+ case line == "strings":
+ str = str[:0]
+ inStrings = true
+ case line == "regexps":
+ inStrings = false
+ case line[0] == '"':
+ q, err := strconv.Unquote(line)
+ if err != nil {
+ // Fatal because we'll get out of sync.
+ t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
+ }
+ if inStrings {
+ str = append(str, q)
+ continue
+ }
+ // Is a regexp.
+ if len(input) != 0 {
+ t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
+ }
+ re, err = tryCompile(q)
+ if err != nil {
+ if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
+ // We don't and likely never will support \C; keep going.
+ continue
+ }
+ t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ full := `\A(?:` + q + `)\z`
+ refull, err = tryCompile(full)
+ if err != nil {
+ // Fatal because q worked, so this should always work.
+ t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
+ }
+ input = str
+ case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
+ // A sequence of match results.
+ ncase++
+ if re == nil {
+ // Failed to compile: skip results.
+ continue
+ }
+ if len(input) == 0 {
+ t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
+ }
+ var text string
+ text, input = input[0], input[1:]
+ if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
+ // RE2's \B considers every byte position,
+ // so it sees 'not word boundary' in the
+ // middle of UTF-8 sequences. This package
+ // only considers the positions between runes,
+ // so it disagrees. Skip those cases.
+ continue
+ }
+ res := strings.Split(line, ";")
+ if len(res) != len(run) {
+ t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
+ }
+ for i := range res {
+ have, suffix := run[i](re, refull, text)
+ want := parseResult(t, file, lineno, res[i])
+ if !same(have, want) {
+ t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ b, suffix := match[i](re, refull, text)
+ if b != (want != nil) {
+ t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
+ if nfail++; nfail >= 100 {
+ t.Fatalf("stopping after %d errors", nfail)
+ }
+ continue
+ }
+ }
+
+ default:
+ t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
+ }
+ }
+ if err := scanner.Err(); err != nil {
+ t.Fatalf("%s:%d: %v", file, lineno, err)
+ }
+ if len(input) != 0 {
+ t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
+ }
+ t.Logf("%d cases tested", ncase)
+}
+
+var run = []func(*Regexp, *Regexp, string) ([]int, string){
+ runFull,
+ runPartial,
+ runFullLongest,
+ runPartialLongest,
+}
+
+func runFull(re, refull *Regexp, text string) ([]int, string) {
+ refull.longest = false
+ return refull.FindStringSubmatchIndex(text), "[full]"
+}
+
+func runPartial(re, refull *Regexp, text string) ([]int, string) {
+ re.longest = false
+ return re.FindStringSubmatchIndex(text), ""
+}
+
+func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
+ refull.longest = true
+ return refull.FindStringSubmatchIndex(text), "[full,longest]"
+}
+
+func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
+ re.longest = true
+ return re.FindStringSubmatchIndex(text), "[longest]"
+}
+
+var match = []func(*Regexp, *Regexp, string) (bool, string){
+ matchFull,
+ matchPartial,
+ matchFullLongest,
+ matchPartialLongest,
+}
+
+func matchFull(re, refull *Regexp, text string) (bool, string) {
+ refull.longest = false
+ return refull.MatchString(text), "[full]"
+}
+
+func matchPartial(re, refull *Regexp, text string) (bool, string) {
+ re.longest = false
+ return re.MatchString(text), ""
+}
+
+func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
+ refull.longest = true
+ return refull.MatchString(text), "[full,longest]"
+}
+
+func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
+ re.longest = true
+ return re.MatchString(text), "[longest]"
+}
+
+func isSingleBytes(s string) bool {
+ for _, c := range s {
+ if c >= utf8.RuneSelf {
+ return false
+ }
+ }
+ return true
+}
+
+func tryCompile(s string) (re *Regexp, err error) {
+ // Protect against panic during Compile.
+ defer func() {
+ if r := recover(); r != nil {
+ err = fmt.Errorf("panic: %v", r)
+ }
+ }()
+ return Compile(s)
+}
+
+func parseResult(t *testing.T, file string, lineno int, res string) []int {
+ // A single - indicates no match.
+ if res == "-" {
+ return nil
+ }
+ // Otherwise, a space-separated list of pairs.
+ n := 1
+ for j := 0; j < len(res); j++ {
+ if res[j] == ' ' {
+ n++
+ }
+ }
+ out := make([]int, 2*n)
+ i := 0
+ n = 0
+ for j := 0; j <= len(res); j++ {
+ if j == len(res) || res[j] == ' ' {
+ // Process a single pair. - means no submatch.
+ pair := res[i:j]
+ if pair == "-" {
+ out[n] = -1
+ out[n+1] = -1
+ } else {
+ loStr, hiStr, _ := strings.Cut(pair, "-")
+ lo, err1 := strconv.Atoi(loStr)
+ hi, err2 := strconv.Atoi(hiStr)
+ if err1 != nil || err2 != nil || lo > hi {
+ t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
+ }
+ out[n] = lo
+ out[n+1] = hi
+ }
+ n += 2
+ i = j + 1
+ }
+ }
+ return out
+}
+
+func same(x, y []int) bool {
+ if len(x) != len(y) {
+ return false
+ }
+ for i, xi := range x {
+ if xi != y[i] {
+ return false
+ }
+ }
+ return true
+}
+
+// TestFowler runs this package's regexp API against the
+// POSIX regular expression tests collected by Glenn Fowler
+// at http://www2.research.att.com/~astopen/testregex/testregex.html.
+func TestFowler(t *testing.T) {
+ files, err := filepath.Glob("testdata/*.dat")
+ if err != nil {
+ t.Fatal(err)
+ }
+ for _, file := range files {
+ t.Log(file)
+ testFowler(t, file)
+ }
+}
+
+var notab = MustCompilePOSIX(`[^\t]+`)
+
+func testFowler(t *testing.T, file string) {
+ f, err := os.Open(file)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ defer f.Close()
+ b := bufio.NewReader(f)
+ lineno := 0
+ lastRegexp := ""
+Reading:
+ for {
+ lineno++
+ line, err := b.ReadString('\n')
+ if err != nil {
+ if err != io.EOF {
+ t.Errorf("%s:%d: %v", file, lineno, err)
+ }
+ break Reading
+ }
+
+ // http://www2.research.att.com/~astopen/man/man1/testregex.html
+ //
+ // INPUT FORMAT
+ // Input lines may be blank, a comment beginning with #, or a test
+ // specification. A specification is five fields separated by one
+ // or more tabs. NULL denotes the empty string and NIL denotes the
+ // 0 pointer.
+ if line[0] == '#' || line[0] == '\n' {
+ continue Reading
+ }
+ line = line[:len(line)-1]
+ field := notab.FindAllString(line, -1)
+ for i, f := range field {
+ if f == "NULL" {
+ field[i] = ""
+ }
+ if f == "NIL" {
+ t.Logf("%s:%d: skip: %s", file, lineno, line)
+ continue Reading
+ }
+ }
+ if len(field) == 0 {
+ continue Reading
+ }
+
+ // Field 1: the regex(3) flags to apply, one character per REG_feature
+ // flag. The test is skipped if REG_feature is not supported by the
+ // implementation. If the first character is not [BEASKLP] then the
+ // specification is a global control line. One or more of [BEASKLP] may be
+ // specified; the test will be repeated for each mode.
+ //
+ // B basic BRE (grep, ed, sed)
+ // E REG_EXTENDED ERE (egrep)
+ // A REG_AUGMENTED ARE (egrep with negation)
+ // S REG_SHELL SRE (sh glob)
+ // K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
+ // L REG_LITERAL LRE (fgrep)
+ //
+ // a REG_LEFT|REG_RIGHT implicit ^...$
+ // b REG_NOTBOL lhs does not match ^
+ // c REG_COMMENT ignore space and #...\n
+ // d REG_SHELL_DOT explicit leading . match
+ // e REG_NOTEOL rhs does not match $
+ // f REG_MULTIPLE multiple \n separated patterns
+ // g FNM_LEADING_DIR testfnmatch only -- match until /
+ // h REG_MULTIREF multiple digit backref
+ // i REG_ICASE ignore case
+ // j REG_SPAN . matches \n
+ // k REG_ESCAPE \ to escape [...] delimiter
+ // l REG_LEFT implicit ^...
+ // m REG_MINIMAL minimal match
+ // n REG_NEWLINE explicit \n match
+ // o REG_ENCLOSED (|&) magic inside [@|&](...)
+ // p REG_SHELL_PATH explicit / match
+ // q REG_DELIMITED delimited pattern
+ // r REG_RIGHT implicit ...$
+ // s REG_SHELL_ESCAPED \ not special
+ // t REG_MUSTDELIM all delimiters must be specified
+ // u standard unspecified behavior -- errors not counted
+ // v REG_CLASS_ESCAPE \ special inside [...]
+ // w REG_NOSUB no subexpression match array
+ // x REG_LENIENT let some errors slide
+ // y REG_LEFT regexec() implicit ^...
+ // z REG_NULL NULL subexpressions ok
+ // $ expand C \c escapes in fields 2 and 3
+ // / field 2 is a regsubcomp() expression
+ // = field 3 is a regdecomp() expression
+ //
+ // Field 1 control lines:
+ //
+ // C set LC_COLLATE and LC_CTYPE to locale in field 2
+ //
+ // ?test ... output field 5 if passed and != EXPECTED, silent otherwise
+ // &test ... output field 5 if current and previous passed
+ // |test ... output field 5 if current passed and previous failed
+ // ; ... output field 2 if previous failed
+ // {test ... skip if failed until }
+ // } end of skip
+ //
+ // : comment comment copied as output NOTE
+ // :comment:test :comment: ignored
+ // N[OTE] comment comment copied as output NOTE
+ // T[EST] comment comment
+ //
+ // number use number for nmatch (20 by default)
+ flag := field[0]
+ switch flag[0] {
+ case '?', '&', '|', ';', '{', '}':
+ // Ignore all the control operators.
+ // Just run everything.
+ flag = flag[1:]
+ if flag == "" {
+ continue Reading
+ }
+ case ':':
+ var ok bool
+ if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
+ t.Logf("skip: %s", line)
+ continue Reading
+ }
+ case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ t.Logf("skip: %s", line)
+ continue Reading
+ }
+
+ // Can check field count now that we've handled the myriad comment formats.
+ if len(field) < 4 {
+ t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
+ continue Reading
+ }
+
+ // Expand C escapes (a.k.a. Go escapes).
+ if strings.Contains(flag, "$") {
+ f := `"` + field[1] + `"`
+ if field[1], err = strconv.Unquote(f); err != nil {
+ t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+ }
+ f = `"` + field[2] + `"`
+ if field[2], err = strconv.Unquote(f); err != nil {
+ t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+ }
+ }
+
+ // Field 2: the regular expression pattern; SAME uses the pattern from
+ // the previous specification.
+ //
+ if field[1] == "SAME" {
+ field[1] = lastRegexp
+ }
+ lastRegexp = field[1]
+
+ // Field 3: the string to match.
+ text := field[2]
+
+ // Field 4: the test outcome...
+ ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
+ if !ok {
+ t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
+ continue Reading
+ }
+
+ // Field 5: optional comment appended to the report.
+
+ Testing:
+ // Run test once for each specified capital letter mode that we support.
+ for _, c := range flag {
+ pattern := field[1]
+ syn := syntax.POSIX | syntax.ClassNL
+ switch c {
+ default:
+ continue Testing
+ case 'E':
+ // extended regexp (what we support)
+ case 'L':
+ // literal
+ pattern = QuoteMeta(pattern)
+ }
+
+ for _, c := range flag {
+ switch c {
+ case 'i':
+ syn |= syntax.FoldCase
+ }
+ }
+
+ re, err := compile(pattern, syn, true)
+ if err != nil {
+ if shouldCompile {
+ t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
+ }
+ continue Testing
+ }
+ if !shouldCompile {
+ t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
+ continue Testing
+ }
+ match := re.MatchString(text)
+ if match != shouldMatch {
+ t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
+ continue Testing
+ }
+ have := re.FindStringSubmatchIndex(text)
+ if (len(have) > 0) != match {
+ t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
+ continue Testing
+ }
+ if len(have) > len(pos) {
+ have = have[:len(pos)]
+ }
+ if !same(have, pos) {
+ t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
+ }
+ }
+ }
+}
+
+func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
+ // Field 4: the test outcome. This is either one of the posix error
+ // codes (with REG_ omitted) or the match array, a list of (m,n)
+ // entries with m and n being first and last+1 positions in the
+ // field 3 string, or NULL if REG_NOSUB is in effect and success
+ // is expected. BADPAT is acceptable in place of any regcomp(3)
+ // error code. The match[] array is initialized to (-2,-2) before
+ // each test. All array elements from 0 to nmatch-1 must be specified
+ // in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
+ // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
+ // matched (?{...}) expression, where x is the text enclosed by {...},
+ // o is the expression ordinal counting from 1, and n is the length of
+ // the unmatched portion of the subject string. If x starts with a
+ // number then that is the return value of re_execf(), otherwise 0 is
+ // returned.
+ switch {
+ case s == "":
+ // Match with no position information.
+ ok = true
+ compiled = true
+ matched = true
+ return
+ case s == "NOMATCH":
+ // Match failure.
+ ok = true
+ compiled = true
+ matched = false
+ return
+ case 'A' <= s[0] && s[0] <= 'Z':
+ // All the other error codes are compile errors.
+ ok = true
+ compiled = false
+ return
+ }
+ compiled = true
+
+ var x []int
+ for s != "" {
+ var end byte = ')'
+ if len(x)%2 == 0 {
+ if s[0] != '(' {
+ ok = false
+ return
+ }
+ s = s[1:]
+ end = ','
+ }
+ i := 0
+ for i < len(s) && s[i] != end {
+ i++
+ }
+ if i == 0 || i == len(s) {
+ ok = false
+ return
+ }
+ var v = -1
+ var err error
+ if s[:i] != "?" {
+ v, err = strconv.Atoi(s[:i])
+ if err != nil {
+ ok = false
+ return
+ }
+ }
+ x = append(x, v)
+ s = s[i+1:]
+ }
+ if len(x)%2 != 0 {
+ ok = false
+ return
+ }
+ ok = true
+ matched = true
+ pos = x
+ return
+}
+
+var text []byte
+
+func makeText(n int) []byte {
+ if len(text) >= n {
+ return text[:n]
+ }
+ text = make([]byte, n)
+ x := ^uint32(0)
+ for i := range text {
+ x += x
+ x ^= 1
+ if int32(x) < 0 {
+ x ^= 0x88888eef
+ }
+ if x%31 == 0 {
+ text[i] = '\n'
+ } else {
+ text[i] = byte(x%(0x7E+1-0x20) + 0x20)
+ }
+ }
+ return text
+}
+
+func BenchmarkMatch(b *testing.B) {
+ isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
+
+ for _, data := range benchData {
+ r := MustCompile(data.re)
+ for _, size := range benchSizes {
+ if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
+ continue
+ }
+ t := makeText(size.n)
+ b.Run(data.name+"/"+size.name, func(b *testing.B) {
+ b.SetBytes(int64(size.n))
+ for i := 0; i < b.N; i++ {
+ if r.Match(t) {
+ b.Fatal("match!")
+ }
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkMatch_onepass_regex(b *testing.B) {
+ isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
+ r := MustCompile(`(?s)\A.*\z`)
+ if r.onepass == nil {
+ b.Fatalf("want onepass regex, but %q is not onepass", r)
+ }
+ for _, size := range benchSizes {
+ if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
+ continue
+ }
+ t := makeText(size.n)
+ b.Run(size.name, func(b *testing.B) {
+ b.SetBytes(int64(size.n))
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ if !r.Match(t) {
+ b.Fatal("not match!")
+ }
+ }
+ })
+ }
+}
+
+var benchData = []struct{ name, re string }{
+ {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+ {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
+ {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
+ {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+ {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+ {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
+}
+
+var benchSizes = []struct {
+ name string
+ n int
+}{
+ {"16", 16},
+ {"32", 32},
+ {"1K", 1 << 10},
+ {"32K", 32 << 10},
+ {"1M", 1 << 20},
+ {"32M", 32 << 20},
+}
+
+func TestLongest(t *testing.T) {
+ re, err := Compile(`a(|b)`)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if g, w := re.FindString("ab"), "a"; g != w {
+ t.Errorf("first match was %q, want %q", g, w)
+ }
+ re.Longest()
+ if g, w := re.FindString("ab"), "ab"; g != w {
+ t.Errorf("longest match was %q, want %q", g, w)
+ }
+}
+
+// TestProgramTooLongForBacktrack tests that a regex which is too long
+// for the backtracker still executes properly.
+func TestProgramTooLongForBacktrack(t *testing.T) {
+ longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
+ if !longRegex.MatchString("two") {
+ t.Errorf("longRegex.MatchString(\"two\") was false, want true")
+ }
+ if longRegex.MatchString("xxx") {
+ t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
+ }
+}