diff options
Diffstat (limited to 'src/strings/replace_test.go')
-rw-r--r-- | src/strings/replace_test.go | 583 |
1 files changed, 583 insertions, 0 deletions
diff --git a/src/strings/replace_test.go b/src/strings/replace_test.go new file mode 100644 index 0000000..34b5bad --- /dev/null +++ b/src/strings/replace_test.go @@ -0,0 +1,583 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + "bytes" + "fmt" + . "strings" + "testing" +) + +var htmlEscaper = NewReplacer( + "&", "&", + "<", "<", + ">", ">", + `"`, """, + "'", "'", +) + +var htmlUnescaper = NewReplacer( + "&", "&", + "<", "<", + ">", ">", + """, `"`, + "'", "'", +) + +// The http package's old HTML escaping function. +func oldHTMLEscape(s string) string { + s = Replace(s, "&", "&", -1) + s = Replace(s, "<", "<", -1) + s = Replace(s, ">", ">", -1) + s = Replace(s, `"`, """, -1) + s = Replace(s, "'", "'", -1) + return s +} + +var capitalLetters = NewReplacer("a", "A", "b", "B") + +// TestReplacer tests the replacer implementations. +func TestReplacer(t *testing.T) { + type testCase struct { + r *Replacer + in, out string + } + var testCases []testCase + + // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8. + str := func(b byte) string { + return string([]byte{b}) + } + var s []string + + // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00". + s = nil + for i := 0; i < 256; i++ { + s = append(s, str(byte(i)), str(byte(i+1))) + } + inc := NewReplacer(s...) + + // Test cases with 1-byte old strings, 1-byte new strings. + testCases = append(testCases, + testCase{capitalLetters, "brad", "BrAd"}, + testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, + testCase{capitalLetters, "", ""}, + + testCase{inc, "brad", "csbe"}, + testCase{inc, "\x00\xff", "\x01\x00"}, + testCase{inc, "", ""}, + + testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"}, + ) + + // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ... + s = nil + for i := 0; i < 256; i++ { + n := i + 1 - 'a' + if n < 1 { + n = 1 + } + s = append(s, str(byte(i)), Repeat(str(byte(i)), n)) + } + repeat := NewReplacer(s...) + + // Test cases with 1-byte old strings, variable length new strings. + testCases = append(testCases, + testCase{htmlEscaper, "No changes", "No changes"}, + testCase{htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, + testCase{htmlEscaper, "&&&", "&&&"}, + testCase{htmlEscaper, "", ""}, + + testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"}, + testCase{repeat, "abba", "abbbba"}, + testCase{repeat, "", ""}, + + testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"}, + ) + + // The remaining test cases have variable length old strings. + + testCases = append(testCases, + testCase{htmlUnescaper, "&amp;", "&"}, + testCase{htmlUnescaper, "<b>HTML's neat</b>", "<b>HTML's neat</b>"}, + testCase{htmlUnescaper, "", ""}, + + testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"}, + + testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"}, + + testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"}, + ) + + // gen1 has multiple old strings of variable length. There is no + // overall non-empty common prefix, but some pairwise common prefixes. + gen1 := NewReplacer( + "aaa", "3[aaa]", + "aa", "2[aa]", + "a", "1[a]", + "i", "i", + "longerst", "most long", + "longer", "medium", + "long", "short", + "xx", "xx", + "x", "X", + "X", "Y", + "Y", "Z", + ) + testCases = append(testCases, + testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"}, + testCase{gen1, "long, longerst, longer", "short, most long, medium"}, + testCase{gen1, "xxxxx", "xxxxX"}, + testCase{gen1, "XiX", "YiY"}, + testCase{gen1, "", ""}, + ) + + // gen2 has multiple old strings with no pairwise common prefix. + gen2 := NewReplacer( + "roses", "red", + "violets", "blue", + "sugar", "sweet", + ) + testCases = append(testCases, + testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."}, + testCase{gen2, "", ""}, + ) + + // gen3 has multiple old strings with an overall common prefix. + gen3 := NewReplacer( + "abracadabra", "poof", + "abracadabrakazam", "splat", + "abraham", "lincoln", + "abrasion", "scrape", + "abraham", "isaac", + ) + testCases = append(testCases, + testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"}, + testCase{gen3, "abrasion abracad", "scrape abracad"}, + testCase{gen3, "abba abram abrasive", "abba abram abrasive"}, + testCase{gen3, "", ""}, + ) + + // foo{1,2,3,4} have multiple old strings with an overall common prefix + // and 1- or 2- byte extensions from the common prefix. + foo1 := NewReplacer( + "foo1", "A", + "foo2", "B", + "foo3", "C", + ) + foo2 := NewReplacer( + "foo1", "A", + "foo2", "B", + "foo31", "C", + "foo32", "D", + ) + foo3 := NewReplacer( + "foo11", "A", + "foo12", "B", + "foo31", "C", + "foo32", "D", + ) + foo4 := NewReplacer( + "foo12", "B", + "foo32", "D", + ) + testCases = append(testCases, + testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"}, + testCase{foo1, "", ""}, + + testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"}, + testCase{foo2, "", ""}, + + testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"}, + testCase{foo3, "", ""}, + + testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"}, + testCase{foo4, "", ""}, + ) + + // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things. + allBytes := make([]byte, 256) + for i := range allBytes { + allBytes[i] = byte(i) + } + allString := string(allBytes) + genAll := NewReplacer( + allString, "[all]", + "\xff", "[ff]", + "\x00", "[00]", + ) + testCases = append(testCases, + testCase{genAll, allString, "[all]"}, + testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"}, + testCase{genAll, "", ""}, + ) + + // Test cases with empty old strings. + + blankToX1 := NewReplacer("", "X") + blankToX2 := NewReplacer("", "X", "", "") + blankHighPriority := NewReplacer("", "X", "o", "O") + blankLowPriority := NewReplacer("o", "O", "", "X") + blankNoOp1 := NewReplacer("", "") + blankNoOp2 := NewReplacer("", "", "", "A") + blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z") + testCases = append(testCases, + testCase{blankToX1, "foo", "XfXoXoX"}, + testCase{blankToX1, "", "X"}, + + testCase{blankToX2, "foo", "XfXoXoX"}, + testCase{blankToX2, "", "X"}, + + testCase{blankHighPriority, "oo", "XOXOX"}, + testCase{blankHighPriority, "ii", "XiXiX"}, + testCase{blankHighPriority, "oiio", "XOXiXiXOX"}, + testCase{blankHighPriority, "iooi", "XiXOXOXiX"}, + testCase{blankHighPriority, "", "X"}, + + testCase{blankLowPriority, "oo", "OOX"}, + testCase{blankLowPriority, "ii", "XiXiX"}, + testCase{blankLowPriority, "oiio", "OXiXiOX"}, + testCase{blankLowPriority, "iooi", "XiOOXiX"}, + testCase{blankLowPriority, "", "X"}, + + testCase{blankNoOp1, "foo", "foo"}, + testCase{blankNoOp1, "", ""}, + + testCase{blankNoOp2, "foo", "foo"}, + testCase{blankNoOp2, "", ""}, + + testCase{blankFoo, "foobarfoobaz", "XRXZX"}, + testCase{blankFoo, "foobar-foobaz", "XRX-XZX"}, + testCase{blankFoo, "", "X"}, + ) + + // single string replacer + + abcMatcher := NewReplacer("abc", "[match]") + + testCases = append(testCases, + testCase{abcMatcher, "", ""}, + testCase{abcMatcher, "ab", "ab"}, + testCase{abcMatcher, "abc", "[match]"}, + testCase{abcMatcher, "abcd", "[match]d"}, + testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"}, + ) + + // Issue 6659 cases (more single string replacer) + + noHello := NewReplacer("Hello", "") + testCases = append(testCases, + testCase{noHello, "Hello", ""}, + testCase{noHello, "Hellox", "x"}, + testCase{noHello, "xHello", "x"}, + testCase{noHello, "xHellox", "xx"}, + ) + + // No-arg test cases. + + nop := NewReplacer() + testCases = append(testCases, + testCase{nop, "abc", "abc"}, + testCase{nop, "", ""}, + ) + + // Run the test cases. + + for i, tc := range testCases { + if s := tc.r.Replace(tc.in); s != tc.out { + t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out) + } + var buf bytes.Buffer + n, err := tc.r.WriteString(&buf, tc.in) + if err != nil { + t.Errorf("%d. WriteString: %v", i, err) + continue + } + got := buf.String() + if got != tc.out { + t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out) + continue + } + if n != len(tc.out) { + t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)", + i, tc.in, n, len(tc.out), tc.out) + } + } +} + +var algorithmTestCases = []struct { + r *Replacer + want string +}{ + {capitalLetters, "*strings.byteReplacer"}, + {htmlEscaper, "*strings.byteStringReplacer"}, + {NewReplacer("12", "123"), "*strings.singleStringReplacer"}, + {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, + {NewReplacer("", "X"), "*strings.genericReplacer"}, + {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"}, +} + +// TestPickAlgorithm tests that NewReplacer picks the correct algorithm. +func TestPickAlgorithm(t *testing.T) { + for i, tc := range algorithmTestCases { + got := fmt.Sprintf("%T", tc.r.Replacer()) + if got != tc.want { + t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want) + } + } +} + +type errWriter struct{} + +func (errWriter) Write(p []byte) (n int, err error) { + return 0, fmt.Errorf("unwritable") +} + +// TestWriteStringError tests that WriteString returns an error +// received from the underlying io.Writer. +func TestWriteStringError(t *testing.T) { + for i, tc := range algorithmTestCases { + n, err := tc.r.WriteString(errWriter{}, "abc") + if n != 0 || err == nil || err.Error() != "unwritable" { + t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err) + } + } +} + +// TestGenericTrieBuilding verifies the structure of the generated trie. There +// is one node per line, and the key ending with the current line is in the +// trie if it ends with a "+". +func TestGenericTrieBuilding(t *testing.T) { + testCases := []struct{ in, out string }{ + {"abc;abdef;abdefgh;xx;xy;z", `- + a- + .b- + ..c+ + ..d- + ...ef+ + .....gh+ + x- + .x+ + .y+ + z+ + `}, + {"abracadabra;abracadabrakazam;abraham;abrasion", `- + a- + .bra- + ....c- + .....adabra+ + ...........kazam+ + ....h- + .....am+ + ....s- + .....ion+ + `}, + {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `- + X+ + Y+ + a+ + .a+ + ..a+ + i+ + l- + .ong+ + ....er+ + ......st+ + x+ + .x+ + `}, + {"foo;;foo;foo1", `+ + f- + .oo+ + ...1+ + `}, + } + + for _, tc := range testCases { + keys := Split(tc.in, ";") + args := make([]string, len(keys)*2) + for i, key := range keys { + args[i*2] = key + } + + got := NewReplacer(args...).PrintTrie() + // Remove tabs from tc.out + wantbuf := make([]byte, 0, len(tc.out)) + for i := 0; i < len(tc.out); i++ { + if tc.out[i] != '\t' { + wantbuf = append(wantbuf, tc.out[i]) + } + } + want := string(wantbuf) + + if got != want { + t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want) + } + } +} + +func BenchmarkGenericNoMatch(b *testing.B) { + str := Repeat("A", 100) + Repeat("B", 100) + generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic + for i := 0; i < b.N; i++ { + generic.Replace(str) + } +} + +func BenchmarkGenericMatch1(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + generic := NewReplacer("a", "A", "b", "B", "12", "123") + for i := 0; i < b.N; i++ { + generic.Replace(str) + } +} + +func BenchmarkGenericMatch2(b *testing.B) { + str := Repeat("It's <b>HTML</b>!", 100) + for i := 0; i < b.N; i++ { + htmlUnescaper.Replace(str) + } +} + +func benchmarkSingleString(b *testing.B, pattern, text string) { + r := NewReplacer(pattern, "[match]") + b.SetBytes(int64(len(text))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + r.Replace(text) + } +} + +func BenchmarkSingleMaxSkipping(b *testing.B) { + benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000)) +} + +func BenchmarkSingleLongSuffixFail(b *testing.B) { + benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002)) +} + +func BenchmarkSingleMatch(b *testing.B) { + benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000)) +} + +func BenchmarkByteByteNoMatch(b *testing.B) { + str := Repeat("A", 100) + Repeat("B", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteByteMatch(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + for i := 0; i < b.N; i++ { + capitalLetters.Replace(str) + } +} + +func BenchmarkByteStringMatch(b *testing.B) { + str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeNew(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeOld(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + oldHTMLEscape(str) + } +} + +func BenchmarkByteStringReplacerWriteString(b *testing.B) { + str := Repeat("I <3 to escape HTML & other text too.", 100) + buf := new(bytes.Buffer) + for i := 0; i < b.N; i++ { + htmlEscaper.WriteString(buf, str) + buf.Reset() + } +} + +func BenchmarkByteReplacerWriteString(b *testing.B) { + str := Repeat("abcdefghijklmnopqrstuvwxyz", 100) + buf := new(bytes.Buffer) + for i := 0; i < b.N; i++ { + capitalLetters.WriteString(buf, str) + buf.Reset() + } +} + +// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. +func BenchmarkByteByteReplaces(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + for i := 0; i < b.N; i++ { + Replace(Replace(str, "a", "A", -1), "b", "B", -1) + } +} + +// BenchmarkByteByteMap compares byteByteImpl against Map. +func BenchmarkByteByteMap(b *testing.B) { + str := Repeat("a", 100) + Repeat("b", 100) + fn := func(r rune) rune { + switch r { + case 'a': + return 'A' + case 'b': + return 'B' + } + return r + } + for i := 0; i < b.N; i++ { + Map(fn, str) + } +} + +var mapdata = []struct{ name, data string }{ + {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"}, + {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"}, +} + +func BenchmarkMap(b *testing.B) { + mapidentity := func(r rune) rune { + return r + } + + b.Run("identity", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Map(mapidentity, md.data) + } + }) + } + }) + + mapchange := func(r rune) rune { + if 'a' <= r && r <= 'z' { + return r + 'A' - 'a' + } + if 'α' <= r && r <= 'ω' { + return r + 'Α' - 'α' + } + return r + } + + b.Run("change", func(b *testing.B) { + for _, md := range mapdata { + b.Run(md.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + Map(mapchange, md.data) + } + }) + } + }) +} |