summaryrefslogtreecommitdiffstats
path: root/src/strings/replace_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/strings/replace_test.go')
-rw-r--r--src/strings/replace_test.go583
1 files changed, 583 insertions, 0 deletions
diff --git a/src/strings/replace_test.go b/src/strings/replace_test.go
new file mode 100644
index 0000000..34b5bad
--- /dev/null
+++ b/src/strings/replace_test.go
@@ -0,0 +1,583 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ "fmt"
+ . "strings"
+ "testing"
+)
+
+var htmlEscaper = NewReplacer(
+ "&", "&",
+ "<", "&lt;",
+ ">", "&gt;",
+ `"`, "&quot;",
+ "'", "&apos;",
+)
+
+var htmlUnescaper = NewReplacer(
+ "&amp;", "&",
+ "&lt;", "<",
+ "&gt;", ">",
+ "&quot;", `"`,
+ "&apos;", "'",
+)
+
+// The http package's old HTML escaping function.
+func oldHTMLEscape(s string) string {
+ s = Replace(s, "&", "&amp;", -1)
+ s = Replace(s, "<", "&lt;", -1)
+ s = Replace(s, ">", "&gt;", -1)
+ s = Replace(s, `"`, "&quot;", -1)
+ s = Replace(s, "'", "&apos;", -1)
+ return s
+}
+
+var capitalLetters = NewReplacer("a", "A", "b", "B")
+
+// TestReplacer tests the replacer implementations.
+func TestReplacer(t *testing.T) {
+ type testCase struct {
+ r *Replacer
+ in, out string
+ }
+ var testCases []testCase
+
+ // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
+ str := func(b byte) string {
+ return string([]byte{b})
+ }
+ var s []string
+
+ // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
+ s = nil
+ for i := 0; i < 256; i++ {
+ s = append(s, str(byte(i)), str(byte(i+1)))
+ }
+ inc := NewReplacer(s...)
+
+ // Test cases with 1-byte old strings, 1-byte new strings.
+ testCases = append(testCases,
+ testCase{capitalLetters, "brad", "BrAd"},
+ testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+ testCase{capitalLetters, "", ""},
+
+ testCase{inc, "brad", "csbe"},
+ testCase{inc, "\x00\xff", "\x01\x00"},
+ testCase{inc, "", ""},
+
+ testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
+ )
+
+ // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
+ s = nil
+ for i := 0; i < 256; i++ {
+ n := i + 1 - 'a'
+ if n < 1 {
+ n = 1
+ }
+ s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
+ }
+ repeat := NewReplacer(s...)
+
+ // Test cases with 1-byte old strings, variable length new strings.
+ testCases = append(testCases,
+ testCase{htmlEscaper, "No changes", "No changes"},
+ testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
+ testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+ testCase{htmlEscaper, "", ""},
+
+ testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
+ testCase{repeat, "abba", "abbbba"},
+ testCase{repeat, "", ""},
+
+ testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
+ )
+
+ // The remaining test cases have variable length old strings.
+
+ testCases = append(testCases,
+ testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
+ testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
+ testCase{htmlUnescaper, "", ""},
+
+ testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
+
+ testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
+
+ testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
+ )
+
+ // gen1 has multiple old strings of variable length. There is no
+ // overall non-empty common prefix, but some pairwise common prefixes.
+ gen1 := NewReplacer(
+ "aaa", "3[aaa]",
+ "aa", "2[aa]",
+ "a", "1[a]",
+ "i", "i",
+ "longerst", "most long",
+ "longer", "medium",
+ "long", "short",
+ "xx", "xx",
+ "x", "X",
+ "X", "Y",
+ "Y", "Z",
+ )
+ testCases = append(testCases,
+ testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
+ testCase{gen1, "long, longerst, longer", "short, most long, medium"},
+ testCase{gen1, "xxxxx", "xxxxX"},
+ testCase{gen1, "XiX", "YiY"},
+ testCase{gen1, "", ""},
+ )
+
+ // gen2 has multiple old strings with no pairwise common prefix.
+ gen2 := NewReplacer(
+ "roses", "red",
+ "violets", "blue",
+ "sugar", "sweet",
+ )
+ testCases = append(testCases,
+ testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
+ testCase{gen2, "", ""},
+ )
+
+ // gen3 has multiple old strings with an overall common prefix.
+ gen3 := NewReplacer(
+ "abracadabra", "poof",
+ "abracadabrakazam", "splat",
+ "abraham", "lincoln",
+ "abrasion", "scrape",
+ "abraham", "isaac",
+ )
+ testCases = append(testCases,
+ testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
+ testCase{gen3, "abrasion abracad", "scrape abracad"},
+ testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
+ testCase{gen3, "", ""},
+ )
+
+ // foo{1,2,3,4} have multiple old strings with an overall common prefix
+ // and 1- or 2- byte extensions from the common prefix.
+ foo1 := NewReplacer(
+ "foo1", "A",
+ "foo2", "B",
+ "foo3", "C",
+ )
+ foo2 := NewReplacer(
+ "foo1", "A",
+ "foo2", "B",
+ "foo31", "C",
+ "foo32", "D",
+ )
+ foo3 := NewReplacer(
+ "foo11", "A",
+ "foo12", "B",
+ "foo31", "C",
+ "foo32", "D",
+ )
+ foo4 := NewReplacer(
+ "foo12", "B",
+ "foo32", "D",
+ )
+ testCases = append(testCases,
+ testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
+ testCase{foo1, "", ""},
+
+ testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
+ testCase{foo2, "", ""},
+
+ testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
+ testCase{foo3, "", ""},
+
+ testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
+ testCase{foo4, "", ""},
+ )
+
+ // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
+ allBytes := make([]byte, 256)
+ for i := range allBytes {
+ allBytes[i] = byte(i)
+ }
+ allString := string(allBytes)
+ genAll := NewReplacer(
+ allString, "[all]",
+ "\xff", "[ff]",
+ "\x00", "[00]",
+ )
+ testCases = append(testCases,
+ testCase{genAll, allString, "[all]"},
+ testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
+ testCase{genAll, "", ""},
+ )
+
+ // Test cases with empty old strings.
+
+ blankToX1 := NewReplacer("", "X")
+ blankToX2 := NewReplacer("", "X", "", "")
+ blankHighPriority := NewReplacer("", "X", "o", "O")
+ blankLowPriority := NewReplacer("o", "O", "", "X")
+ blankNoOp1 := NewReplacer("", "")
+ blankNoOp2 := NewReplacer("", "", "", "A")
+ blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
+ testCases = append(testCases,
+ testCase{blankToX1, "foo", "XfXoXoX"},
+ testCase{blankToX1, "", "X"},
+
+ testCase{blankToX2, "foo", "XfXoXoX"},
+ testCase{blankToX2, "", "X"},
+
+ testCase{blankHighPriority, "oo", "XOXOX"},
+ testCase{blankHighPriority, "ii", "XiXiX"},
+ testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
+ testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
+ testCase{blankHighPriority, "", "X"},
+
+ testCase{blankLowPriority, "oo", "OOX"},
+ testCase{blankLowPriority, "ii", "XiXiX"},
+ testCase{blankLowPriority, "oiio", "OXiXiOX"},
+ testCase{blankLowPriority, "iooi", "XiOOXiX"},
+ testCase{blankLowPriority, "", "X"},
+
+ testCase{blankNoOp1, "foo", "foo"},
+ testCase{blankNoOp1, "", ""},
+
+ testCase{blankNoOp2, "foo", "foo"},
+ testCase{blankNoOp2, "", ""},
+
+ testCase{blankFoo, "foobarfoobaz", "XRXZX"},
+ testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
+ testCase{blankFoo, "", "X"},
+ )
+
+ // single string replacer
+
+ abcMatcher := NewReplacer("abc", "[match]")
+
+ testCases = append(testCases,
+ testCase{abcMatcher, "", ""},
+ testCase{abcMatcher, "ab", "ab"},
+ testCase{abcMatcher, "abc", "[match]"},
+ testCase{abcMatcher, "abcd", "[match]d"},
+ testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
+ )
+
+ // Issue 6659 cases (more single string replacer)
+
+ noHello := NewReplacer("Hello", "")
+ testCases = append(testCases,
+ testCase{noHello, "Hello", ""},
+ testCase{noHello, "Hellox", "x"},
+ testCase{noHello, "xHello", "x"},
+ testCase{noHello, "xHellox", "xx"},
+ )
+
+ // No-arg test cases.
+
+ nop := NewReplacer()
+ testCases = append(testCases,
+ testCase{nop, "abc", "abc"},
+ testCase{nop, "", ""},
+ )
+
+ // Run the test cases.
+
+ for i, tc := range testCases {
+ if s := tc.r.Replace(tc.in); s != tc.out {
+ t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
+ }
+ var buf bytes.Buffer
+ n, err := tc.r.WriteString(&buf, tc.in)
+ if err != nil {
+ t.Errorf("%d. WriteString: %v", i, err)
+ continue
+ }
+ got := buf.String()
+ if got != tc.out {
+ t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
+ continue
+ }
+ if n != len(tc.out) {
+ t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
+ i, tc.in, n, len(tc.out), tc.out)
+ }
+ }
+}
+
+var algorithmTestCases = []struct {
+ r *Replacer
+ want string
+}{
+ {capitalLetters, "*strings.byteReplacer"},
+ {htmlEscaper, "*strings.byteStringReplacer"},
+ {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
+ {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+ {NewReplacer("", "X"), "*strings.genericReplacer"},
+ {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
+}
+
+// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
+func TestPickAlgorithm(t *testing.T) {
+ for i, tc := range algorithmTestCases {
+ got := fmt.Sprintf("%T", tc.r.Replacer())
+ if got != tc.want {
+ t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
+ }
+ }
+}
+
+type errWriter struct{}
+
+func (errWriter) Write(p []byte) (n int, err error) {
+ return 0, fmt.Errorf("unwritable")
+}
+
+// TestWriteStringError tests that WriteString returns an error
+// received from the underlying io.Writer.
+func TestWriteStringError(t *testing.T) {
+ for i, tc := range algorithmTestCases {
+ n, err := tc.r.WriteString(errWriter{}, "abc")
+ if n != 0 || err == nil || err.Error() != "unwritable" {
+ t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
+ }
+ }
+}
+
+// TestGenericTrieBuilding verifies the structure of the generated trie. There
+// is one node per line, and the key ending with the current line is in the
+// trie if it ends with a "+".
+func TestGenericTrieBuilding(t *testing.T) {
+ testCases := []struct{ in, out string }{
+ {"abc;abdef;abdefgh;xx;xy;z", `-
+ a-
+ .b-
+ ..c+
+ ..d-
+ ...ef+
+ .....gh+
+ x-
+ .x+
+ .y+
+ z+
+ `},
+ {"abracadabra;abracadabrakazam;abraham;abrasion", `-
+ a-
+ .bra-
+ ....c-
+ .....adabra+
+ ...........kazam+
+ ....h-
+ .....am+
+ ....s-
+ .....ion+
+ `},
+ {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
+ X+
+ Y+
+ a+
+ .a+
+ ..a+
+ i+
+ l-
+ .ong+
+ ....er+
+ ......st+
+ x+
+ .x+
+ `},
+ {"foo;;foo;foo1", `+
+ f-
+ .oo+
+ ...1+
+ `},
+ }
+
+ for _, tc := range testCases {
+ keys := Split(tc.in, ";")
+ args := make([]string, len(keys)*2)
+ for i, key := range keys {
+ args[i*2] = key
+ }
+
+ got := NewReplacer(args...).PrintTrie()
+ // Remove tabs from tc.out
+ wantbuf := make([]byte, 0, len(tc.out))
+ for i := 0; i < len(tc.out); i++ {
+ if tc.out[i] != '\t' {
+ wantbuf = append(wantbuf, tc.out[i])
+ }
+ }
+ want := string(wantbuf)
+
+ if got != want {
+ t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
+ }
+ }
+}
+
+func BenchmarkGenericNoMatch(b *testing.B) {
+ str := Repeat("A", 100) + Repeat("B", 100)
+ generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
+ for i := 0; i < b.N; i++ {
+ generic.Replace(str)
+ }
+}
+
+func BenchmarkGenericMatch1(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ generic := NewReplacer("a", "A", "b", "B", "12", "123")
+ for i := 0; i < b.N; i++ {
+ generic.Replace(str)
+ }
+}
+
+func BenchmarkGenericMatch2(b *testing.B) {
+ str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
+ for i := 0; i < b.N; i++ {
+ htmlUnescaper.Replace(str)
+ }
+}
+
+func benchmarkSingleString(b *testing.B, pattern, text string) {
+ r := NewReplacer(pattern, "[match]")
+ b.SetBytes(int64(len(text)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ r.Replace(text)
+ }
+}
+
+func BenchmarkSingleMaxSkipping(b *testing.B) {
+ benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
+}
+
+func BenchmarkSingleLongSuffixFail(b *testing.B) {
+ benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
+}
+
+func BenchmarkSingleMatch(b *testing.B) {
+ benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
+}
+
+func BenchmarkByteByteNoMatch(b *testing.B) {
+ str := Repeat("A", 100) + Repeat("B", 100)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.Replace(str)
+ }
+}
+
+func BenchmarkByteByteMatch(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.Replace(str)
+ }
+}
+
+func BenchmarkByteStringMatch(b *testing.B) {
+ str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeNew(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeOld(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ oldHTMLEscape(str)
+ }
+}
+
+func BenchmarkByteStringReplacerWriteString(b *testing.B) {
+ str := Repeat("I <3 to escape HTML & other text too.", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
+func BenchmarkByteReplacerWriteString(b *testing.B) {
+ str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
+// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
+func BenchmarkByteByteReplaces(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ for i := 0; i < b.N; i++ {
+ Replace(Replace(str, "a", "A", -1), "b", "B", -1)
+ }
+}
+
+// BenchmarkByteByteMap compares byteByteImpl against Map.
+func BenchmarkByteByteMap(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ fn := func(r rune) rune {
+ switch r {
+ case 'a':
+ return 'A'
+ case 'b':
+ return 'B'
+ }
+ return r
+ }
+ for i := 0; i < b.N; i++ {
+ Map(fn, str)
+ }
+}
+
+var mapdata = []struct{ name, data string }{
+ {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
+ {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
+}
+
+func BenchmarkMap(b *testing.B) {
+ mapidentity := func(r rune) rune {
+ return r
+ }
+
+ b.Run("identity", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapidentity, md.data)
+ }
+ })
+ }
+ })
+
+ mapchange := func(r rune) rune {
+ if 'a' <= r && r <= 'z' {
+ return r + 'A' - 'a'
+ }
+ if 'α' <= r && r <= 'ω' {
+ return r + 'Α' - 'α'
+ }
+ return r
+ }
+
+ b.Run("change", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapchange, md.data)
+ }
+ })
+ }
+ })
+}