summaryrefslogtreecommitdiffstats
path: root/src/strings
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
commitccd992355df7192993c666236047820244914598 (patch)
treef00fea65147227b7743083c6148396f74cd66935 /src/strings
parentInitial commit. (diff)
downloadgolang-1.21-ccd992355df7192993c666236047820244914598.tar.xz
golang-1.21-ccd992355df7192993c666236047820244914598.zip
Adding upstream version 1.21.8.upstream/1.21.8
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/strings')
-rw-r--r--src/strings/builder.go118
-rw-r--r--src/strings/builder_test.go371
-rw-r--r--src/strings/clone.go28
-rw-r--r--src/strings/clone_test.go45
-rw-r--r--src/strings/compare.go28
-rw-r--r--src/strings/compare_test.go119
-rw-r--r--src/strings/example_test.go449
-rw-r--r--src/strings/export_test.go47
-rw-r--r--src/strings/reader.go160
-rw-r--r--src/strings/reader_test.go233
-rw-r--r--src/strings/replace.go578
-rw-r--r--src/strings/replace_test.go583
-rw-r--r--src/strings/search.go124
-rw-r--r--src/strings/search_test.go90
-rw-r--r--src/strings/strings.go1305
-rw-r--r--src/strings/strings_test.go2053
16 files changed, 6331 insertions, 0 deletions
diff --git a/src/strings/builder.go b/src/strings/builder.go
new file mode 100644
index 0000000..299ad51
--- /dev/null
+++ b/src/strings/builder.go
@@ -0,0 +1,118 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "internal/bytealg"
+ "unicode/utf8"
+ "unsafe"
+)
+
+// A Builder is used to efficiently build a string using Write methods.
+// It minimizes memory copying. The zero value is ready to use.
+// Do not copy a non-zero Builder.
+type Builder struct {
+ addr *Builder // of receiver, to detect copies by value
+ buf []byte
+}
+
+// noescape hides a pointer from escape analysis. It is the identity function
+// but escape analysis doesn't think the output depends on the input.
+// noescape is inlined and currently compiles down to zero instructions.
+// USE CAREFULLY!
+// This was copied from the runtime; see issues 23382 and 7921.
+//
+//go:nosplit
+//go:nocheckptr
+func noescape(p unsafe.Pointer) unsafe.Pointer {
+ x := uintptr(p)
+ return unsafe.Pointer(x ^ 0)
+}
+
+func (b *Builder) copyCheck() {
+ if b.addr == nil {
+ // This hack works around a failing of Go's escape analysis
+ // that was causing b to escape and be heap allocated.
+ // See issue 23382.
+ // TODO: once issue 7921 is fixed, this should be reverted to
+ // just "b.addr = b".
+ b.addr = (*Builder)(noescape(unsafe.Pointer(b)))
+ } else if b.addr != b {
+ panic("strings: illegal use of non-zero Builder copied by value")
+ }
+}
+
+// String returns the accumulated string.
+func (b *Builder) String() string {
+ return unsafe.String(unsafe.SliceData(b.buf), len(b.buf))
+}
+
+// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
+func (b *Builder) Len() int { return len(b.buf) }
+
+// Cap returns the capacity of the builder's underlying byte slice. It is the
+// total space allocated for the string being built and includes any bytes
+// already written.
+func (b *Builder) Cap() int { return cap(b.buf) }
+
+// Reset resets the Builder to be empty.
+func (b *Builder) Reset() {
+ b.addr = nil
+ b.buf = nil
+}
+
+// grow copies the buffer to a new, larger buffer so that there are at least n
+// bytes of capacity beyond len(b.buf).
+func (b *Builder) grow(n int) {
+ buf := bytealg.MakeNoZero(2*cap(b.buf) + n)[:len(b.buf)]
+ copy(buf, b.buf)
+ b.buf = buf
+}
+
+// Grow grows b's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to b
+// without another allocation. If n is negative, Grow panics.
+func (b *Builder) Grow(n int) {
+ b.copyCheck()
+ if n < 0 {
+ panic("strings.Builder.Grow: negative count")
+ }
+ if cap(b.buf)-len(b.buf) < n {
+ b.grow(n)
+ }
+}
+
+// Write appends the contents of p to b's buffer.
+// Write always returns len(p), nil.
+func (b *Builder) Write(p []byte) (int, error) {
+ b.copyCheck()
+ b.buf = append(b.buf, p...)
+ return len(p), nil
+}
+
+// WriteByte appends the byte c to b's buffer.
+// The returned error is always nil.
+func (b *Builder) WriteByte(c byte) error {
+ b.copyCheck()
+ b.buf = append(b.buf, c)
+ return nil
+}
+
+// WriteRune appends the UTF-8 encoding of Unicode code point r to b's buffer.
+// It returns the length of r and a nil error.
+func (b *Builder) WriteRune(r rune) (int, error) {
+ b.copyCheck()
+ n := len(b.buf)
+ b.buf = utf8.AppendRune(b.buf, r)
+ return len(b.buf) - n, nil
+}
+
+// WriteString appends the contents of s to b's buffer.
+// It returns the length of s and a nil error.
+func (b *Builder) WriteString(s string) (int, error) {
+ b.copyCheck()
+ b.buf = append(b.buf, s...)
+ return len(s), nil
+}
diff --git a/src/strings/builder_test.go b/src/strings/builder_test.go
new file mode 100644
index 0000000..dbc2c19
--- /dev/null
+++ b/src/strings/builder_test.go
@@ -0,0 +1,371 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ . "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+func check(t *testing.T, b *Builder, want string) {
+ t.Helper()
+ got := b.String()
+ if got != want {
+ t.Errorf("String: got %#q; want %#q", got, want)
+ return
+ }
+ if n := b.Len(); n != len(got) {
+ t.Errorf("Len: got %d; but len(String()) is %d", n, len(got))
+ }
+ if n := b.Cap(); n < len(got) {
+ t.Errorf("Cap: got %d; but len(String()) is %d", n, len(got))
+ }
+}
+
+func TestBuilder(t *testing.T) {
+ var b Builder
+ check(t, &b, "")
+ n, err := b.WriteString("hello")
+ if err != nil || n != 5 {
+ t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
+ }
+ check(t, &b, "hello")
+ if err = b.WriteByte(' '); err != nil {
+ t.Errorf("WriteByte: %s", err)
+ }
+ check(t, &b, "hello ")
+ n, err = b.WriteString("world")
+ if err != nil || n != 5 {
+ t.Errorf("WriteString: got %d,%s; want 5,nil", n, err)
+ }
+ check(t, &b, "hello world")
+}
+
+func TestBuilderString(t *testing.T) {
+ var b Builder
+ b.WriteString("alpha")
+ check(t, &b, "alpha")
+ s1 := b.String()
+ b.WriteString("beta")
+ check(t, &b, "alphabeta")
+ s2 := b.String()
+ b.WriteString("gamma")
+ check(t, &b, "alphabetagamma")
+ s3 := b.String()
+
+ // Check that subsequent operations didn't change the returned strings.
+ if want := "alpha"; s1 != want {
+ t.Errorf("first String result is now %q; want %q", s1, want)
+ }
+ if want := "alphabeta"; s2 != want {
+ t.Errorf("second String result is now %q; want %q", s2, want)
+ }
+ if want := "alphabetagamma"; s3 != want {
+ t.Errorf("third String result is now %q; want %q", s3, want)
+ }
+}
+
+func TestBuilderReset(t *testing.T) {
+ var b Builder
+ check(t, &b, "")
+ b.WriteString("aaa")
+ s := b.String()
+ check(t, &b, "aaa")
+ b.Reset()
+ check(t, &b, "")
+
+ // Ensure that writing after Reset doesn't alter
+ // previously returned strings.
+ b.WriteString("bbb")
+ check(t, &b, "bbb")
+ if want := "aaa"; s != want {
+ t.Errorf("previous String result changed after Reset: got %q; want %q", s, want)
+ }
+}
+
+func TestBuilderGrow(t *testing.T) {
+ for _, growLen := range []int{0, 100, 1000, 10000, 100000} {
+ p := bytes.Repeat([]byte{'a'}, growLen)
+ allocs := testing.AllocsPerRun(100, func() {
+ var b Builder
+ b.Grow(growLen) // should be only alloc, when growLen > 0
+ if b.Cap() < growLen {
+ t.Fatalf("growLen=%d: Cap() is lower than growLen", growLen)
+ }
+ b.Write(p)
+ if b.String() != string(p) {
+ t.Fatalf("growLen=%d: bad data written after Grow", growLen)
+ }
+ })
+ wantAllocs := 1
+ if growLen == 0 {
+ wantAllocs = 0
+ }
+ if g, w := int(allocs), wantAllocs; g != w {
+ t.Errorf("growLen=%d: got %d allocs during Write; want %v", growLen, g, w)
+ }
+ }
+ // when growLen < 0, should panic
+ var a Builder
+ n := -1
+ defer func() {
+ if r := recover(); r == nil {
+ t.Errorf("a.Grow(%d) should panic()", n)
+ }
+ }()
+ a.Grow(n)
+}
+
+func TestBuilderWrite2(t *testing.T) {
+ const s0 = "hello 世界"
+ for _, tt := range []struct {
+ name string
+ fn func(b *Builder) (int, error)
+ n int
+ want string
+ }{
+ {
+ "Write",
+ func(b *Builder) (int, error) { return b.Write([]byte(s0)) },
+ len(s0),
+ s0,
+ },
+ {
+ "WriteRune",
+ func(b *Builder) (int, error) { return b.WriteRune('a') },
+ 1,
+ "a",
+ },
+ {
+ "WriteRuneWide",
+ func(b *Builder) (int, error) { return b.WriteRune('世') },
+ 3,
+ "世",
+ },
+ {
+ "WriteString",
+ func(b *Builder) (int, error) { return b.WriteString(s0) },
+ len(s0),
+ s0,
+ },
+ } {
+ t.Run(tt.name, func(t *testing.T) {
+ var b Builder
+ n, err := tt.fn(&b)
+ if err != nil {
+ t.Fatalf("first call: got %s", err)
+ }
+ if n != tt.n {
+ t.Errorf("first call: got n=%d; want %d", n, tt.n)
+ }
+ check(t, &b, tt.want)
+
+ n, err = tt.fn(&b)
+ if err != nil {
+ t.Fatalf("second call: got %s", err)
+ }
+ if n != tt.n {
+ t.Errorf("second call: got n=%d; want %d", n, tt.n)
+ }
+ check(t, &b, tt.want+tt.want)
+ })
+ }
+}
+
+func TestBuilderWriteByte(t *testing.T) {
+ var b Builder
+ if err := b.WriteByte('a'); err != nil {
+ t.Error(err)
+ }
+ if err := b.WriteByte(0); err != nil {
+ t.Error(err)
+ }
+ check(t, &b, "a\x00")
+}
+
+func TestBuilderAllocs(t *testing.T) {
+ // Issue 23382; verify that copyCheck doesn't force the
+ // Builder to escape and be heap allocated.
+ n := testing.AllocsPerRun(10000, func() {
+ var b Builder
+ b.Grow(5)
+ b.WriteString("abcde")
+ _ = b.String()
+ })
+ if n != 1 {
+ t.Errorf("Builder allocs = %v; want 1", n)
+ }
+}
+
+func TestBuilderCopyPanic(t *testing.T) {
+ tests := []struct {
+ name string
+ fn func()
+ wantPanic bool
+ }{
+ {
+ name: "String",
+ wantPanic: false,
+ fn: func() {
+ var a Builder
+ a.WriteByte('x')
+ b := a
+ _ = b.String() // appease vet
+ },
+ },
+ {
+ name: "Len",
+ wantPanic: false,
+ fn: func() {
+ var a Builder
+ a.WriteByte('x')
+ b := a
+ b.Len()
+ },
+ },
+ {
+ name: "Cap",
+ wantPanic: false,
+ fn: func() {
+ var a Builder
+ a.WriteByte('x')
+ b := a
+ b.Cap()
+ },
+ },
+ {
+ name: "Reset",
+ wantPanic: false,
+ fn: func() {
+ var a Builder
+ a.WriteByte('x')
+ b := a
+ b.Reset()
+ b.WriteByte('y')
+ },
+ },
+ {
+ name: "Write",
+ wantPanic: true,
+ fn: func() {
+ var a Builder
+ a.Write([]byte("x"))
+ b := a
+ b.Write([]byte("y"))
+ },
+ },
+ {
+ name: "WriteByte",
+ wantPanic: true,
+ fn: func() {
+ var a Builder
+ a.WriteByte('x')
+ b := a
+ b.WriteByte('y')
+ },
+ },
+ {
+ name: "WriteString",
+ wantPanic: true,
+ fn: func() {
+ var a Builder
+ a.WriteString("x")
+ b := a
+ b.WriteString("y")
+ },
+ },
+ {
+ name: "WriteRune",
+ wantPanic: true,
+ fn: func() {
+ var a Builder
+ a.WriteRune('x')
+ b := a
+ b.WriteRune('y')
+ },
+ },
+ {
+ name: "Grow",
+ wantPanic: true,
+ fn: func() {
+ var a Builder
+ a.Grow(1)
+ b := a
+ b.Grow(2)
+ },
+ },
+ }
+ for _, tt := range tests {
+ didPanic := make(chan bool)
+ go func() {
+ defer func() { didPanic <- recover() != nil }()
+ tt.fn()
+ }()
+ if got := <-didPanic; got != tt.wantPanic {
+ t.Errorf("%s: panicked = %v; want %v", tt.name, got, tt.wantPanic)
+ }
+ }
+}
+
+func TestBuilderWriteInvalidRune(t *testing.T) {
+ // Invalid runes, including negative ones, should be written as
+ // utf8.RuneError.
+ for _, r := range []rune{-1, utf8.MaxRune + 1} {
+ var b Builder
+ b.WriteRune(r)
+ check(t, &b, "\uFFFD")
+ }
+}
+
+var someBytes = []byte("some bytes sdljlk jsklj3lkjlk djlkjw")
+
+var sinkS string
+
+func benchmarkBuilder(b *testing.B, f func(b *testing.B, numWrite int, grow bool)) {
+ b.Run("1Write_NoGrow", func(b *testing.B) {
+ b.ReportAllocs()
+ f(b, 1, false)
+ })
+ b.Run("3Write_NoGrow", func(b *testing.B) {
+ b.ReportAllocs()
+ f(b, 3, false)
+ })
+ b.Run("3Write_Grow", func(b *testing.B) {
+ b.ReportAllocs()
+ f(b, 3, true)
+ })
+}
+
+func BenchmarkBuildString_Builder(b *testing.B) {
+ benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
+ for i := 0; i < b.N; i++ {
+ var buf Builder
+ if grow {
+ buf.Grow(len(someBytes) * numWrite)
+ }
+ for i := 0; i < numWrite; i++ {
+ buf.Write(someBytes)
+ }
+ sinkS = buf.String()
+ }
+ })
+}
+
+func BenchmarkBuildString_ByteBuffer(b *testing.B) {
+ benchmarkBuilder(b, func(b *testing.B, numWrite int, grow bool) {
+ for i := 0; i < b.N; i++ {
+ var buf bytes.Buffer
+ if grow {
+ buf.Grow(len(someBytes) * numWrite)
+ }
+ for i := 0; i < numWrite; i++ {
+ buf.Write(someBytes)
+ }
+ sinkS = buf.String()
+ }
+ })
+}
diff --git a/src/strings/clone.go b/src/strings/clone.go
new file mode 100644
index 0000000..d14df11
--- /dev/null
+++ b/src/strings/clone.go
@@ -0,0 +1,28 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "unsafe"
+)
+
+// Clone returns a fresh copy of s.
+// It guarantees to make a copy of s into a new allocation,
+// which can be important when retaining only a small substring
+// of a much larger string. Using Clone can help such programs
+// use less memory. Of course, since using Clone makes a copy,
+// overuse of Clone can make programs use more memory.
+// Clone should typically be used only rarely, and only when
+// profiling indicates that it is needed.
+// For strings of length zero the string "" will be returned
+// and no allocation is made.
+func Clone(s string) string {
+ if len(s) == 0 {
+ return ""
+ }
+ b := make([]byte, len(s))
+ copy(b, s)
+ return unsafe.String(&b[0], len(b))
+}
diff --git a/src/strings/clone_test.go b/src/strings/clone_test.go
new file mode 100644
index 0000000..64f2760
--- /dev/null
+++ b/src/strings/clone_test.go
@@ -0,0 +1,45 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "strings"
+ "testing"
+ "unsafe"
+)
+
+var emptyString string
+
+func TestClone(t *testing.T) {
+ var cloneTests = []string{
+ "",
+ strings.Clone(""),
+ strings.Repeat("a", 42)[:0],
+ "short",
+ strings.Repeat("a", 42),
+ }
+ for _, input := range cloneTests {
+ clone := strings.Clone(input)
+ if clone != input {
+ t.Errorf("Clone(%q) = %q; want %q", input, clone, input)
+ }
+
+ if len(input) != 0 && unsafe.StringData(clone) == unsafe.StringData(input) {
+ t.Errorf("Clone(%q) return value should not reference inputs backing memory.", input)
+ }
+
+ if len(input) == 0 && unsafe.StringData(clone) != unsafe.StringData(emptyString) {
+ t.Errorf("Clone(%#v) return value should be equal to empty string.", unsafe.StringData(input))
+ }
+ }
+}
+
+func BenchmarkClone(b *testing.B) {
+ var str = strings.Repeat("a", 42)
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ stringSink = strings.Clone(str)
+ }
+}
diff --git a/src/strings/compare.go b/src/strings/compare.go
new file mode 100644
index 0000000..2bd4a24
--- /dev/null
+++ b/src/strings/compare.go
@@ -0,0 +1,28 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+// Compare returns an integer comparing two strings lexicographically.
+// The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
+//
+// Compare is included only for symmetry with package bytes.
+// It is usually clearer and always faster to use the built-in
+// string comparison operators ==, <, >, and so on.
+func Compare(a, b string) int {
+ // NOTE(rsc): This function does NOT call the runtime cmpstring function,
+ // because we do not want to provide any performance justification for
+ // using strings.Compare. Basically no one should use strings.Compare.
+ // As the comment above says, it is here only for symmetry with package bytes.
+ // If performance is important, the compiler should be changed to recognize
+ // the pattern so that all code doing three-way comparisons, not just code
+ // using strings.Compare, can benefit.
+ if a == b {
+ return 0
+ }
+ if a < b {
+ return -1
+ }
+ return +1
+}
diff --git a/src/strings/compare_test.go b/src/strings/compare_test.go
new file mode 100644
index 0000000..a435784
--- /dev/null
+++ b/src/strings/compare_test.go
@@ -0,0 +1,119 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+// Derived from bytes/compare_test.go.
+// Benchmarks omitted since the underlying implementation is identical.
+
+import (
+ "internal/testenv"
+ . "strings"
+ "testing"
+ "unsafe"
+)
+
+var compareTests = []struct {
+ a, b string
+ i int
+}{
+ {"", "", 0},
+ {"a", "", 1},
+ {"", "a", -1},
+ {"abc", "abc", 0},
+ {"ab", "abc", -1},
+ {"abc", "ab", 1},
+ {"x", "ab", 1},
+ {"ab", "x", -1},
+ {"x", "a", 1},
+ {"b", "x", -1},
+ // test runtime·memeq's chunked implementation
+ {"abcdefgh", "abcdefgh", 0},
+ {"abcdefghi", "abcdefghi", 0},
+ {"abcdefghi", "abcdefghj", -1},
+}
+
+func TestCompare(t *testing.T) {
+ for _, tt := range compareTests {
+ cmp := Compare(tt.a, tt.b)
+ if cmp != tt.i {
+ t.Errorf(`Compare(%q, %q) = %v`, tt.a, tt.b, cmp)
+ }
+ }
+}
+
+func TestCompareIdenticalString(t *testing.T) {
+ var s = "Hello Gophers!"
+ if Compare(s, s) != 0 {
+ t.Error("s != s")
+ }
+ if Compare(s, s[:1]) != 1 {
+ t.Error("s > s[:1] failed")
+ }
+}
+
+func TestCompareStrings(t *testing.T) {
+ // unsafeString converts a []byte to a string with no allocation.
+ // The caller must not modify b while the result string is in use.
+ unsafeString := func(b []byte) string {
+ return unsafe.String(unsafe.SliceData(b), len(b))
+ }
+
+ lengths := make([]int, 0) // lengths to test in ascending order
+ for i := 0; i <= 128; i++ {
+ lengths = append(lengths, i)
+ }
+ lengths = append(lengths, 256, 512, 1024, 1333, 4095, 4096, 4097)
+
+ if !testing.Short() || testenv.Builder() != "" {
+ lengths = append(lengths, 65535, 65536, 65537, 99999)
+ }
+
+ n := lengths[len(lengths)-1]
+ a := make([]byte, n+1)
+ b := make([]byte, n+1)
+ lastLen := 0
+ for _, len := range lengths {
+ // randomish but deterministic data. No 0 or 255.
+ for i := 0; i < len; i++ {
+ a[i] = byte(1 + 31*i%254)
+ b[i] = byte(1 + 31*i%254)
+ }
+ // data past the end is different
+ for i := len; i <= n; i++ {
+ a[i] = 8
+ b[i] = 9
+ }
+
+ sa, sb := unsafeString(a), unsafeString(b)
+ cmp := Compare(sa[:len], sb[:len])
+ if cmp != 0 {
+ t.Errorf(`CompareIdentical(%d) = %d`, len, cmp)
+ }
+ if len > 0 {
+ cmp = Compare(sa[:len-1], sb[:len])
+ if cmp != -1 {
+ t.Errorf(`CompareAshorter(%d) = %d`, len, cmp)
+ }
+ cmp = Compare(sa[:len], sb[:len-1])
+ if cmp != 1 {
+ t.Errorf(`CompareBshorter(%d) = %d`, len, cmp)
+ }
+ }
+ for k := lastLen; k < len; k++ {
+ b[k] = a[k] - 1
+ cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
+ if cmp != 1 {
+ t.Errorf(`CompareAbigger(%d,%d) = %d`, len, k, cmp)
+ }
+ b[k] = a[k] + 1
+ cmp = Compare(unsafeString(a[:len]), unsafeString(b[:len]))
+ if cmp != -1 {
+ t.Errorf(`CompareBbigger(%d,%d) = %d`, len, k, cmp)
+ }
+ b[k] = a[k]
+ }
+ lastLen = len
+ }
+}
diff --git a/src/strings/example_test.go b/src/strings/example_test.go
new file mode 100644
index 0000000..ab83e10
--- /dev/null
+++ b/src/strings/example_test.go
@@ -0,0 +1,449 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "fmt"
+ "strings"
+ "unicode"
+ "unsafe"
+)
+
+func ExampleClone() {
+ s := "abc"
+ clone := strings.Clone(s)
+ fmt.Println(s == clone)
+ fmt.Println(unsafe.StringData(s) == unsafe.StringData(clone))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleBuilder() {
+ var b strings.Builder
+ for i := 3; i >= 1; i-- {
+ fmt.Fprintf(&b, "%d...", i)
+ }
+ b.WriteString("ignition")
+ fmt.Println(b.String())
+
+ // Output: 3...2...1...ignition
+}
+
+func ExampleCompare() {
+ fmt.Println(strings.Compare("a", "b"))
+ fmt.Println(strings.Compare("a", "a"))
+ fmt.Println(strings.Compare("b", "a"))
+ // Output:
+ // -1
+ // 0
+ // 1
+}
+
+func ExampleContains() {
+ fmt.Println(strings.Contains("seafood", "foo"))
+ fmt.Println(strings.Contains("seafood", "bar"))
+ fmt.Println(strings.Contains("seafood", ""))
+ fmt.Println(strings.Contains("", ""))
+ // Output:
+ // true
+ // false
+ // true
+ // true
+}
+
+func ExampleContainsAny() {
+ fmt.Println(strings.ContainsAny("team", "i"))
+ fmt.Println(strings.ContainsAny("fail", "ui"))
+ fmt.Println(strings.ContainsAny("ure", "ui"))
+ fmt.Println(strings.ContainsAny("failure", "ui"))
+ fmt.Println(strings.ContainsAny("foo", ""))
+ fmt.Println(strings.ContainsAny("", ""))
+ // Output:
+ // false
+ // true
+ // true
+ // true
+ // false
+ // false
+}
+
+func ExampleContainsRune() {
+ // Finds whether a string contains a particular Unicode code point.
+ // The code point for the lowercase letter "a", for example, is 97.
+ fmt.Println(strings.ContainsRune("aardvark", 97))
+ fmt.Println(strings.ContainsRune("timeout", 97))
+ // Output:
+ // true
+ // false
+}
+
+func ExampleCount() {
+ fmt.Println(strings.Count("cheese", "e"))
+ fmt.Println(strings.Count("five", "")) // before & after each rune
+ // Output:
+ // 3
+ // 5
+}
+
+func ExampleCut() {
+ show := func(s, sep string) {
+ before, after, found := strings.Cut(s, sep)
+ fmt.Printf("Cut(%q, %q) = %q, %q, %v\n", s, sep, before, after, found)
+ }
+ show("Gopher", "Go")
+ show("Gopher", "ph")
+ show("Gopher", "er")
+ show("Gopher", "Badger")
+ // Output:
+ // Cut("Gopher", "Go") = "", "pher", true
+ // Cut("Gopher", "ph") = "Go", "er", true
+ // Cut("Gopher", "er") = "Goph", "", true
+ // Cut("Gopher", "Badger") = "Gopher", "", false
+}
+
+func ExampleCutPrefix() {
+ show := func(s, sep string) {
+ after, found := strings.CutPrefix(s, sep)
+ fmt.Printf("CutPrefix(%q, %q) = %q, %v\n", s, sep, after, found)
+ }
+ show("Gopher", "Go")
+ show("Gopher", "ph")
+ // Output:
+ // CutPrefix("Gopher", "Go") = "pher", true
+ // CutPrefix("Gopher", "ph") = "Gopher", false
+}
+
+func ExampleCutSuffix() {
+ show := func(s, sep string) {
+ before, found := strings.CutSuffix(s, sep)
+ fmt.Printf("CutSuffix(%q, %q) = %q, %v\n", s, sep, before, found)
+ }
+ show("Gopher", "Go")
+ show("Gopher", "er")
+ // Output:
+ // CutSuffix("Gopher", "Go") = "Gopher", false
+ // CutSuffix("Gopher", "er") = "Goph", true
+}
+
+func ExampleEqualFold() {
+ fmt.Println(strings.EqualFold("Go", "go"))
+ fmt.Println(strings.EqualFold("AB", "ab")) // true because comparison uses simple case-folding
+ fmt.Println(strings.EqualFold("ß", "ss")) // false because comparison does not use full case-folding
+ // Output:
+ // true
+ // true
+ // false
+}
+
+func ExampleFields() {
+ fmt.Printf("Fields are: %q", strings.Fields(" foo bar baz "))
+ // Output: Fields are: ["foo" "bar" "baz"]
+}
+
+func ExampleFieldsFunc() {
+ f := func(c rune) bool {
+ return !unicode.IsLetter(c) && !unicode.IsNumber(c)
+ }
+ fmt.Printf("Fields are: %q", strings.FieldsFunc(" foo1;bar2,baz3...", f))
+ // Output: Fields are: ["foo1" "bar2" "baz3"]
+}
+
+func ExampleHasPrefix() {
+ fmt.Println(strings.HasPrefix("Gopher", "Go"))
+ fmt.Println(strings.HasPrefix("Gopher", "C"))
+ fmt.Println(strings.HasPrefix("Gopher", ""))
+ // Output:
+ // true
+ // false
+ // true
+}
+
+func ExampleHasSuffix() {
+ fmt.Println(strings.HasSuffix("Amigo", "go"))
+ fmt.Println(strings.HasSuffix("Amigo", "O"))
+ fmt.Println(strings.HasSuffix("Amigo", "Ami"))
+ fmt.Println(strings.HasSuffix("Amigo", ""))
+ // Output:
+ // true
+ // false
+ // false
+ // true
+}
+
+func ExampleIndex() {
+ fmt.Println(strings.Index("chicken", "ken"))
+ fmt.Println(strings.Index("chicken", "dmr"))
+ // Output:
+ // 4
+ // -1
+}
+
+func ExampleIndexFunc() {
+ f := func(c rune) bool {
+ return unicode.Is(unicode.Han, c)
+ }
+ fmt.Println(strings.IndexFunc("Hello, 世界", f))
+ fmt.Println(strings.IndexFunc("Hello, world", f))
+ // Output:
+ // 7
+ // -1
+}
+
+func ExampleIndexAny() {
+ fmt.Println(strings.IndexAny("chicken", "aeiouy"))
+ fmt.Println(strings.IndexAny("crwth", "aeiouy"))
+ // Output:
+ // 2
+ // -1
+}
+
+func ExampleIndexByte() {
+ fmt.Println(strings.IndexByte("golang", 'g'))
+ fmt.Println(strings.IndexByte("gophers", 'h'))
+ fmt.Println(strings.IndexByte("golang", 'x'))
+ // Output:
+ // 0
+ // 3
+ // -1
+}
+func ExampleIndexRune() {
+ fmt.Println(strings.IndexRune("chicken", 'k'))
+ fmt.Println(strings.IndexRune("chicken", 'd'))
+ // Output:
+ // 4
+ // -1
+}
+
+func ExampleLastIndex() {
+ fmt.Println(strings.Index("go gopher", "go"))
+ fmt.Println(strings.LastIndex("go gopher", "go"))
+ fmt.Println(strings.LastIndex("go gopher", "rodent"))
+ // Output:
+ // 0
+ // 3
+ // -1
+}
+
+func ExampleLastIndexAny() {
+ fmt.Println(strings.LastIndexAny("go gopher", "go"))
+ fmt.Println(strings.LastIndexAny("go gopher", "rodent"))
+ fmt.Println(strings.LastIndexAny("go gopher", "fail"))
+ // Output:
+ // 4
+ // 8
+ // -1
+}
+
+func ExampleLastIndexByte() {
+ fmt.Println(strings.LastIndexByte("Hello, world", 'l'))
+ fmt.Println(strings.LastIndexByte("Hello, world", 'o'))
+ fmt.Println(strings.LastIndexByte("Hello, world", 'x'))
+ // Output:
+ // 10
+ // 8
+ // -1
+}
+
+func ExampleLastIndexFunc() {
+ fmt.Println(strings.LastIndexFunc("go 123", unicode.IsNumber))
+ fmt.Println(strings.LastIndexFunc("123 go", unicode.IsNumber))
+ fmt.Println(strings.LastIndexFunc("go", unicode.IsNumber))
+ // Output:
+ // 5
+ // 2
+ // -1
+}
+
+func ExampleJoin() {
+ s := []string{"foo", "bar", "baz"}
+ fmt.Println(strings.Join(s, ", "))
+ // Output: foo, bar, baz
+}
+
+func ExampleRepeat() {
+ fmt.Println("ba" + strings.Repeat("na", 2))
+ // Output: banana
+}
+
+func ExampleReplace() {
+ fmt.Println(strings.Replace("oink oink oink", "k", "ky", 2))
+ fmt.Println(strings.Replace("oink oink oink", "oink", "moo", -1))
+ // Output:
+ // oinky oinky oink
+ // moo moo moo
+}
+
+func ExampleReplaceAll() {
+ fmt.Println(strings.ReplaceAll("oink oink oink", "oink", "moo"))
+ // Output:
+ // moo moo moo
+}
+
+func ExampleSplit() {
+ fmt.Printf("%q\n", strings.Split("a,b,c", ","))
+ fmt.Printf("%q\n", strings.Split("a man a plan a canal panama", "a "))
+ fmt.Printf("%q\n", strings.Split(" xyz ", ""))
+ fmt.Printf("%q\n", strings.Split("", "Bernardo O'Higgins"))
+ // Output:
+ // ["a" "b" "c"]
+ // ["" "man " "plan " "canal panama"]
+ // [" " "x" "y" "z" " "]
+ // [""]
+}
+
+func ExampleSplitN() {
+ fmt.Printf("%q\n", strings.SplitN("a,b,c", ",", 2))
+ z := strings.SplitN("a,b,c", ",", 0)
+ fmt.Printf("%q (nil = %v)\n", z, z == nil)
+ // Output:
+ // ["a" "b,c"]
+ // [] (nil = true)
+}
+
+func ExampleSplitAfter() {
+ fmt.Printf("%q\n", strings.SplitAfter("a,b,c", ","))
+ // Output: ["a," "b," "c"]
+}
+
+func ExampleSplitAfterN() {
+ fmt.Printf("%q\n", strings.SplitAfterN("a,b,c", ",", 2))
+ // Output: ["a," "b,c"]
+}
+
+func ExampleTitle() {
+ // Compare this example to the ToTitle example.
+ fmt.Println(strings.Title("her royal highness"))
+ fmt.Println(strings.Title("loud noises"))
+ fmt.Println(strings.Title("хлеб"))
+ // Output:
+ // Her Royal Highness
+ // Loud Noises
+ // Хлеб
+}
+
+func ExampleToTitle() {
+ // Compare this example to the Title example.
+ fmt.Println(strings.ToTitle("her royal highness"))
+ fmt.Println(strings.ToTitle("loud noises"))
+ fmt.Println(strings.ToTitle("хлеб"))
+ // Output:
+ // HER ROYAL HIGHNESS
+ // LOUD NOISES
+ // ХЛЕБ
+}
+
+func ExampleToTitleSpecial() {
+ fmt.Println(strings.ToTitleSpecial(unicode.TurkishCase, "dünyanın ilk borsa yapısı Aizonai kabul edilir"))
+ // Output:
+ // DÜNYANIN İLK BORSA YAPISI AİZONAİ KABUL EDİLİR
+}
+
+func ExampleMap() {
+ rot13 := func(r rune) rune {
+ switch {
+ case r >= 'A' && r <= 'Z':
+ return 'A' + (r-'A'+13)%26
+ case r >= 'a' && r <= 'z':
+ return 'a' + (r-'a'+13)%26
+ }
+ return r
+ }
+ fmt.Println(strings.Map(rot13, "'Twas brillig and the slithy gopher..."))
+ // Output: 'Gjnf oevyyvt naq gur fyvgul tbcure...
+}
+
+func ExampleNewReplacer() {
+ r := strings.NewReplacer("<", "&lt;", ">", "&gt;")
+ fmt.Println(r.Replace("This is <b>HTML</b>!"))
+ // Output: This is &lt;b&gt;HTML&lt;/b&gt;!
+}
+
+func ExampleToUpper() {
+ fmt.Println(strings.ToUpper("Gopher"))
+ // Output: GOPHER
+}
+
+func ExampleToUpperSpecial() {
+ fmt.Println(strings.ToUpperSpecial(unicode.TurkishCase, "örnek iş"))
+ // Output: ÖRNEK İŞ
+}
+
+func ExampleToLower() {
+ fmt.Println(strings.ToLower("Gopher"))
+ // Output: gopher
+}
+
+func ExampleToLowerSpecial() {
+ fmt.Println(strings.ToLowerSpecial(unicode.TurkishCase, "Önnek İş"))
+ // Output: önnek iş
+}
+
+func ExampleTrim() {
+ fmt.Print(strings.Trim("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: Hello, Gophers
+}
+
+func ExampleTrimSpace() {
+ fmt.Println(strings.TrimSpace(" \t\n Hello, Gophers \n\t\r\n"))
+ // Output: Hello, Gophers
+}
+
+func ExampleTrimPrefix() {
+ var s = "¡¡¡Hello, Gophers!!!"
+ s = strings.TrimPrefix(s, "¡¡¡Hello, ")
+ s = strings.TrimPrefix(s, "¡¡¡Howdy, ")
+ fmt.Print(s)
+ // Output: Gophers!!!
+}
+
+func ExampleTrimSuffix() {
+ var s = "¡¡¡Hello, Gophers!!!"
+ s = strings.TrimSuffix(s, ", Gophers!!!")
+ s = strings.TrimSuffix(s, ", Marmots!!!")
+ fmt.Print(s)
+ // Output: ¡¡¡Hello
+}
+
+func ExampleTrimFunc() {
+ fmt.Print(strings.TrimFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: Hello, Gophers
+}
+
+func ExampleTrimLeft() {
+ fmt.Print(strings.TrimLeft("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: Hello, Gophers!!!
+}
+
+func ExampleTrimLeftFunc() {
+ fmt.Print(strings.TrimLeftFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: Hello, Gophers!!!
+}
+
+func ExampleTrimRight() {
+ fmt.Print(strings.TrimRight("¡¡¡Hello, Gophers!!!", "!¡"))
+ // Output: ¡¡¡Hello, Gophers
+}
+
+func ExampleTrimRightFunc() {
+ fmt.Print(strings.TrimRightFunc("¡¡¡Hello, Gophers!!!", func(r rune) bool {
+ return !unicode.IsLetter(r) && !unicode.IsNumber(r)
+ }))
+ // Output: ¡¡¡Hello, Gophers
+}
+
+func ExampleToValidUTF8() {
+ fmt.Printf("%s\n", strings.ToValidUTF8("abc", "\uFFFD"))
+ fmt.Printf("%s\n", strings.ToValidUTF8("a\xffb\xC0\xAFc\xff", ""))
+ fmt.Printf("%s\n", strings.ToValidUTF8("\xed\xa0\x80", "abc"))
+ // Output:
+ // abc
+ // abc
+ // abc
+}
diff --git a/src/strings/export_test.go b/src/strings/export_test.go
new file mode 100644
index 0000000..81d1cab
--- /dev/null
+++ b/src/strings/export_test.go
@@ -0,0 +1,47 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+func (r *Replacer) Replacer() any {
+ r.once.Do(r.buildOnce)
+ return r.r
+}
+
+func (r *Replacer) PrintTrie() string {
+ r.once.Do(r.buildOnce)
+ gen := r.r.(*genericReplacer)
+ return gen.printNode(&gen.root, 0)
+}
+
+func (r *genericReplacer) printNode(t *trieNode, depth int) (s string) {
+ if t.priority > 0 {
+ s += "+"
+ } else {
+ s += "-"
+ }
+ s += "\n"
+
+ if t.prefix != "" {
+ s += Repeat(".", depth) + t.prefix
+ s += r.printNode(t.next, depth+len(t.prefix))
+ } else if t.table != nil {
+ for b, m := range r.mapping {
+ if int(m) != r.tableSize && t.table[m] != nil {
+ s += Repeat(".", depth) + string([]byte{byte(b)})
+ s += r.printNode(t.table[m], depth+1)
+ }
+ }
+ }
+ return
+}
+
+func StringFind(pattern, text string) int {
+ return makeStringFinder(pattern).next(text)
+}
+
+func DumpTables(pattern string) ([]int, []int) {
+ finder := makeStringFinder(pattern)
+ return finder.badCharSkip[:], finder.goodSuffixSkip
+}
diff --git a/src/strings/reader.go b/src/strings/reader.go
new file mode 100644
index 0000000..04f31a1
--- /dev/null
+++ b/src/strings/reader.go
@@ -0,0 +1,160 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "errors"
+ "io"
+ "unicode/utf8"
+)
+
+// A Reader implements the io.Reader, io.ReaderAt, io.ByteReader, io.ByteScanner,
+// io.RuneReader, io.RuneScanner, io.Seeker, and io.WriterTo interfaces by reading
+// from a string.
+// The zero value for Reader operates like a Reader of an empty string.
+type Reader struct {
+ s string
+ i int64 // current reading index
+ prevRune int // index of previous rune; or < 0
+}
+
+// Len returns the number of bytes of the unread portion of the
+// string.
+func (r *Reader) Len() int {
+ if r.i >= int64(len(r.s)) {
+ return 0
+ }
+ return int(int64(len(r.s)) - r.i)
+}
+
+// Size returns the original length of the underlying string.
+// Size is the number of bytes available for reading via ReadAt.
+// The returned value is always the same and is not affected by calls
+// to any other method.
+func (r *Reader) Size() int64 { return int64(len(r.s)) }
+
+// Read implements the io.Reader interface.
+func (r *Reader) Read(b []byte) (n int, err error) {
+ if r.i >= int64(len(r.s)) {
+ return 0, io.EOF
+ }
+ r.prevRune = -1
+ n = copy(b, r.s[r.i:])
+ r.i += int64(n)
+ return
+}
+
+// ReadAt implements the io.ReaderAt interface.
+func (r *Reader) ReadAt(b []byte, off int64) (n int, err error) {
+ // cannot modify state - see io.ReaderAt
+ if off < 0 {
+ return 0, errors.New("strings.Reader.ReadAt: negative offset")
+ }
+ if off >= int64(len(r.s)) {
+ return 0, io.EOF
+ }
+ n = copy(b, r.s[off:])
+ if n < len(b) {
+ err = io.EOF
+ }
+ return
+}
+
+// ReadByte implements the io.ByteReader interface.
+func (r *Reader) ReadByte() (byte, error) {
+ r.prevRune = -1
+ if r.i >= int64(len(r.s)) {
+ return 0, io.EOF
+ }
+ b := r.s[r.i]
+ r.i++
+ return b, nil
+}
+
+// UnreadByte implements the io.ByteScanner interface.
+func (r *Reader) UnreadByte() error {
+ if r.i <= 0 {
+ return errors.New("strings.Reader.UnreadByte: at beginning of string")
+ }
+ r.prevRune = -1
+ r.i--
+ return nil
+}
+
+// ReadRune implements the io.RuneReader interface.
+func (r *Reader) ReadRune() (ch rune, size int, err error) {
+ if r.i >= int64(len(r.s)) {
+ r.prevRune = -1
+ return 0, 0, io.EOF
+ }
+ r.prevRune = int(r.i)
+ if c := r.s[r.i]; c < utf8.RuneSelf {
+ r.i++
+ return rune(c), 1, nil
+ }
+ ch, size = utf8.DecodeRuneInString(r.s[r.i:])
+ r.i += int64(size)
+ return
+}
+
+// UnreadRune implements the io.RuneScanner interface.
+func (r *Reader) UnreadRune() error {
+ if r.i <= 0 {
+ return errors.New("strings.Reader.UnreadRune: at beginning of string")
+ }
+ if r.prevRune < 0 {
+ return errors.New("strings.Reader.UnreadRune: previous operation was not ReadRune")
+ }
+ r.i = int64(r.prevRune)
+ r.prevRune = -1
+ return nil
+}
+
+// Seek implements the io.Seeker interface.
+func (r *Reader) Seek(offset int64, whence int) (int64, error) {
+ r.prevRune = -1
+ var abs int64
+ switch whence {
+ case io.SeekStart:
+ abs = offset
+ case io.SeekCurrent:
+ abs = r.i + offset
+ case io.SeekEnd:
+ abs = int64(len(r.s)) + offset
+ default:
+ return 0, errors.New("strings.Reader.Seek: invalid whence")
+ }
+ if abs < 0 {
+ return 0, errors.New("strings.Reader.Seek: negative position")
+ }
+ r.i = abs
+ return abs, nil
+}
+
+// WriteTo implements the io.WriterTo interface.
+func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
+ r.prevRune = -1
+ if r.i >= int64(len(r.s)) {
+ return 0, nil
+ }
+ s := r.s[r.i:]
+ m, err := io.WriteString(w, s)
+ if m > len(s) {
+ panic("strings.Reader.WriteTo: invalid WriteString count")
+ }
+ r.i += int64(m)
+ n = int64(m)
+ if m != len(s) && err == nil {
+ err = io.ErrShortWrite
+ }
+ return
+}
+
+// Reset resets the Reader to be reading from s.
+func (r *Reader) Reset(s string) { *r = Reader{s, 0, -1} }
+
+// NewReader returns a new Reader reading from s.
+// It is similar to bytes.NewBufferString but more efficient and non-writable.
+func NewReader(s string) *Reader { return &Reader{s, 0, -1} }
diff --git a/src/strings/reader_test.go b/src/strings/reader_test.go
new file mode 100644
index 0000000..dc99f9c
--- /dev/null
+++ b/src/strings/reader_test.go
@@ -0,0 +1,233 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "strings"
+ "sync"
+ "testing"
+)
+
+func TestReader(t *testing.T) {
+ r := strings.NewReader("0123456789")
+ tests := []struct {
+ off int64
+ seek int
+ n int
+ want string
+ wantpos int64
+ readerr error
+ seekerr string
+ }{
+ {seek: io.SeekStart, off: 0, n: 20, want: "0123456789"},
+ {seek: io.SeekStart, off: 1, n: 1, want: "1"},
+ {seek: io.SeekCurrent, off: 1, wantpos: 3, n: 2, want: "34"},
+ {seek: io.SeekStart, off: -1, seekerr: "strings.Reader.Seek: negative position"},
+ {seek: io.SeekStart, off: 1 << 33, wantpos: 1 << 33, readerr: io.EOF},
+ {seek: io.SeekCurrent, off: 1, wantpos: 1<<33 + 1, readerr: io.EOF},
+ {seek: io.SeekStart, n: 5, want: "01234"},
+ {seek: io.SeekCurrent, n: 5, want: "56789"},
+ {seek: io.SeekEnd, off: -1, n: 1, wantpos: 9, want: "9"},
+ }
+
+ for i, tt := range tests {
+ pos, err := r.Seek(tt.off, tt.seek)
+ if err == nil && tt.seekerr != "" {
+ t.Errorf("%d. want seek error %q", i, tt.seekerr)
+ continue
+ }
+ if err != nil && err.Error() != tt.seekerr {
+ t.Errorf("%d. seek error = %q; want %q", i, err.Error(), tt.seekerr)
+ continue
+ }
+ if tt.wantpos != 0 && tt.wantpos != pos {
+ t.Errorf("%d. pos = %d, want %d", i, pos, tt.wantpos)
+ }
+ buf := make([]byte, tt.n)
+ n, err := r.Read(buf)
+ if err != tt.readerr {
+ t.Errorf("%d. read = %v; want %v", i, err, tt.readerr)
+ continue
+ }
+ got := string(buf[:n])
+ if got != tt.want {
+ t.Errorf("%d. got %q; want %q", i, got, tt.want)
+ }
+ }
+}
+
+func TestReadAfterBigSeek(t *testing.T) {
+ r := strings.NewReader("0123456789")
+ if _, err := r.Seek(1<<31+5, io.SeekStart); err != nil {
+ t.Fatal(err)
+ }
+ if n, err := r.Read(make([]byte, 10)); n != 0 || err != io.EOF {
+ t.Errorf("Read = %d, %v; want 0, EOF", n, err)
+ }
+}
+
+func TestReaderAt(t *testing.T) {
+ r := strings.NewReader("0123456789")
+ tests := []struct {
+ off int64
+ n int
+ want string
+ wanterr any
+ }{
+ {0, 10, "0123456789", nil},
+ {1, 10, "123456789", io.EOF},
+ {1, 9, "123456789", nil},
+ {11, 10, "", io.EOF},
+ {0, 0, "", nil},
+ {-1, 0, "", "strings.Reader.ReadAt: negative offset"},
+ }
+ for i, tt := range tests {
+ b := make([]byte, tt.n)
+ rn, err := r.ReadAt(b, tt.off)
+ got := string(b[:rn])
+ if got != tt.want {
+ t.Errorf("%d. got %q; want %q", i, got, tt.want)
+ }
+ if fmt.Sprintf("%v", err) != fmt.Sprintf("%v", tt.wanterr) {
+ t.Errorf("%d. got error = %v; want %v", i, err, tt.wanterr)
+ }
+ }
+}
+
+func TestReaderAtConcurrent(t *testing.T) {
+ // Test for the race detector, to verify ReadAt doesn't mutate
+ // any state.
+ r := strings.NewReader("0123456789")
+ var wg sync.WaitGroup
+ for i := 0; i < 5; i++ {
+ wg.Add(1)
+ go func(i int) {
+ defer wg.Done()
+ var buf [1]byte
+ r.ReadAt(buf[:], int64(i))
+ }(i)
+ }
+ wg.Wait()
+}
+
+func TestEmptyReaderConcurrent(t *testing.T) {
+ // Test for the race detector, to verify a Read that doesn't yield any bytes
+ // is okay to use from multiple goroutines. This was our historic behavior.
+ // See golang.org/issue/7856
+ r := strings.NewReader("")
+ var wg sync.WaitGroup
+ for i := 0; i < 5; i++ {
+ wg.Add(2)
+ go func() {
+ defer wg.Done()
+ var buf [1]byte
+ r.Read(buf[:])
+ }()
+ go func() {
+ defer wg.Done()
+ r.Read(nil)
+ }()
+ }
+ wg.Wait()
+}
+
+func TestWriteTo(t *testing.T) {
+ const str = "0123456789"
+ for i := 0; i <= len(str); i++ {
+ s := str[i:]
+ r := strings.NewReader(s)
+ var b bytes.Buffer
+ n, err := r.WriteTo(&b)
+ if expect := int64(len(s)); n != expect {
+ t.Errorf("got %v; want %v", n, expect)
+ }
+ if err != nil {
+ t.Errorf("for length %d: got error = %v; want nil", len(s), err)
+ }
+ if b.String() != s {
+ t.Errorf("got string %q; want %q", b.String(), s)
+ }
+ if r.Len() != 0 {
+ t.Errorf("reader contains %v bytes; want 0", r.Len())
+ }
+ }
+}
+
+// tests that Len is affected by reads, but Size is not.
+func TestReaderLenSize(t *testing.T) {
+ r := strings.NewReader("abc")
+ io.CopyN(io.Discard, r, 1)
+ if r.Len() != 2 {
+ t.Errorf("Len = %d; want 2", r.Len())
+ }
+ if r.Size() != 3 {
+ t.Errorf("Size = %d; want 3", r.Size())
+ }
+}
+
+func TestReaderReset(t *testing.T) {
+ r := strings.NewReader("世界")
+ if _, _, err := r.ReadRune(); err != nil {
+ t.Errorf("ReadRune: unexpected error: %v", err)
+ }
+
+ const want = "abcdef"
+ r.Reset(want)
+ if err := r.UnreadRune(); err == nil {
+ t.Errorf("UnreadRune: expected error, got nil")
+ }
+ buf, err := io.ReadAll(r)
+ if err != nil {
+ t.Errorf("ReadAll: unexpected error: %v", err)
+ }
+ if got := string(buf); got != want {
+ t.Errorf("ReadAll: got %q, want %q", got, want)
+ }
+}
+
+func TestReaderZero(t *testing.T) {
+ if l := (&strings.Reader{}).Len(); l != 0 {
+ t.Errorf("Len: got %d, want 0", l)
+ }
+
+ if n, err := (&strings.Reader{}).Read(nil); n != 0 || err != io.EOF {
+ t.Errorf("Read: got %d, %v; want 0, io.EOF", n, err)
+ }
+
+ if n, err := (&strings.Reader{}).ReadAt(nil, 11); n != 0 || err != io.EOF {
+ t.Errorf("ReadAt: got %d, %v; want 0, io.EOF", n, err)
+ }
+
+ if b, err := (&strings.Reader{}).ReadByte(); b != 0 || err != io.EOF {
+ t.Errorf("ReadByte: got %d, %v; want 0, io.EOF", b, err)
+ }
+
+ if ch, size, err := (&strings.Reader{}).ReadRune(); ch != 0 || size != 0 || err != io.EOF {
+ t.Errorf("ReadRune: got %d, %d, %v; want 0, 0, io.EOF", ch, size, err)
+ }
+
+ if offset, err := (&strings.Reader{}).Seek(11, io.SeekStart); offset != 11 || err != nil {
+ t.Errorf("Seek: got %d, %v; want 11, nil", offset, err)
+ }
+
+ if s := (&strings.Reader{}).Size(); s != 0 {
+ t.Errorf("Size: got %d, want 0", s)
+ }
+
+ if (&strings.Reader{}).UnreadByte() == nil {
+ t.Errorf("UnreadByte: got nil, want error")
+ }
+
+ if (&strings.Reader{}).UnreadRune() == nil {
+ t.Errorf("UnreadRune: got nil, want error")
+ }
+
+ if n, err := (&strings.Reader{}).WriteTo(io.Discard); n != 0 || err != nil {
+ t.Errorf("WriteTo: got %d, %v; want 0, nil", n, err)
+ }
+}
diff --git a/src/strings/replace.go b/src/strings/replace.go
new file mode 100644
index 0000000..f504fb4
--- /dev/null
+++ b/src/strings/replace.go
@@ -0,0 +1,578 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "io"
+ "sync"
+)
+
+// Replacer replaces a list of strings with replacements.
+// It is safe for concurrent use by multiple goroutines.
+type Replacer struct {
+ once sync.Once // guards buildOnce method
+ r replacer
+ oldnew []string
+}
+
+// replacer is the interface that a replacement algorithm needs to implement.
+type replacer interface {
+ Replace(s string) string
+ WriteString(w io.Writer, s string) (n int, err error)
+}
+
+// NewReplacer returns a new Replacer from a list of old, new string
+// pairs. Replacements are performed in the order they appear in the
+// target string, without overlapping matches. The old string
+// comparisons are done in argument order.
+//
+// NewReplacer panics if given an odd number of arguments.
+func NewReplacer(oldnew ...string) *Replacer {
+ if len(oldnew)%2 == 1 {
+ panic("strings.NewReplacer: odd argument count")
+ }
+ return &Replacer{oldnew: append([]string(nil), oldnew...)}
+}
+
+func (r *Replacer) buildOnce() {
+ r.r = r.build()
+ r.oldnew = nil
+}
+
+func (b *Replacer) build() replacer {
+ oldnew := b.oldnew
+ if len(oldnew) == 2 && len(oldnew[0]) > 1 {
+ return makeSingleStringReplacer(oldnew[0], oldnew[1])
+ }
+
+ allNewBytes := true
+ for i := 0; i < len(oldnew); i += 2 {
+ if len(oldnew[i]) != 1 {
+ return makeGenericReplacer(oldnew)
+ }
+ if len(oldnew[i+1]) != 1 {
+ allNewBytes = false
+ }
+ }
+
+ if allNewBytes {
+ r := byteReplacer{}
+ for i := range r {
+ r[i] = byte(i)
+ }
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1][0]
+ r[o] = n
+ }
+ return &r
+ }
+
+ r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1]
+ // To avoid counting repetitions multiple times.
+ if r.replacements[o] == nil {
+ // We need to use string([]byte{o}) instead of string(o),
+ // to avoid utf8 encoding of o.
+ // E. g. byte(150) produces string of length 2.
+ r.toReplace = append(r.toReplace, string([]byte{o}))
+ }
+ r.replacements[o] = []byte(n)
+
+ }
+ return &r
+}
+
+// Replace returns a copy of s with all replacements performed.
+func (r *Replacer) Replace(s string) string {
+ r.once.Do(r.buildOnce)
+ return r.r.Replace(s)
+}
+
+// WriteString writes s to w with all replacements performed.
+func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
+ r.once.Do(r.buildOnce)
+ return r.r.WriteString(w, s)
+}
+
+// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
+// and values may be empty. For example, the trie containing keys "ax", "ay",
+// "bcbc", "x" and "xy" could have eight nodes:
+//
+// n0 -
+// n1 a-
+// n2 .x+
+// n3 .y+
+// n4 b-
+// n5 .cbc+
+// n6 x+
+// n7 .y+
+//
+// n0 is the root node, and its children are n1, n4 and n6; n1's children are
+// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
+// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
+// (marked with a trailing "+") are complete keys.
+type trieNode struct {
+ // value is the value of the trie node's key/value pair. It is empty if
+ // this node is not a complete key.
+ value string
+ // priority is the priority (higher is more important) of the trie node's
+ // key/value pair; keys are not necessarily matched shortest- or longest-
+ // first. Priority is positive if this node is a complete key, and zero
+ // otherwise. In the example above, positive/zero priorities are marked
+ // with a trailing "+" or "-".
+ priority int
+
+ // A trie node may have zero, one or more child nodes:
+ // * if the remaining fields are zero, there are no children.
+ // * if prefix and next are non-zero, there is one child in next.
+ // * if table is non-zero, it defines all the children.
+ //
+ // Prefixes are preferred over tables when there is one child, but the
+ // root node always uses a table for lookup efficiency.
+
+ // prefix is the difference in keys between this trie node and the next.
+ // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
+ // Node n5 has no children and so has zero prefix, next and table fields.
+ prefix string
+ next *trieNode
+
+ // table is a lookup table indexed by the next byte in the key, after
+ // remapping that byte through genericReplacer.mapping to create a dense
+ // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
+ // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
+ // genericReplacer.tableSize will be 5. Node n0's table will be
+ // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
+ // 'a', 'b' and 'x'.
+ table []*trieNode
+}
+
+func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
+ if key == "" {
+ if t.priority == 0 {
+ t.value = val
+ t.priority = priority
+ }
+ return
+ }
+
+ if t.prefix != "" {
+ // Need to split the prefix among multiple nodes.
+ var n int // length of the longest common prefix
+ for ; n < len(t.prefix) && n < len(key); n++ {
+ if t.prefix[n] != key[n] {
+ break
+ }
+ }
+ if n == len(t.prefix) {
+ t.next.add(key[n:], val, priority, r)
+ } else if n == 0 {
+ // First byte differs, start a new lookup table here. Looking up
+ // what is currently t.prefix[0] will lead to prefixNode, and
+ // looking up key[0] will lead to keyNode.
+ var prefixNode *trieNode
+ if len(t.prefix) == 1 {
+ prefixNode = t.next
+ } else {
+ prefixNode = &trieNode{
+ prefix: t.prefix[1:],
+ next: t.next,
+ }
+ }
+ keyNode := new(trieNode)
+ t.table = make([]*trieNode, r.tableSize)
+ t.table[r.mapping[t.prefix[0]]] = prefixNode
+ t.table[r.mapping[key[0]]] = keyNode
+ t.prefix = ""
+ t.next = nil
+ keyNode.add(key[1:], val, priority, r)
+ } else {
+ // Insert new node after the common section of the prefix.
+ next := &trieNode{
+ prefix: t.prefix[n:],
+ next: t.next,
+ }
+ t.prefix = t.prefix[:n]
+ t.next = next
+ next.add(key[n:], val, priority, r)
+ }
+ } else if t.table != nil {
+ // Insert into existing table.
+ m := r.mapping[key[0]]
+ if t.table[m] == nil {
+ t.table[m] = new(trieNode)
+ }
+ t.table[m].add(key[1:], val, priority, r)
+ } else {
+ t.prefix = key
+ t.next = new(trieNode)
+ t.next.add("", val, priority, r)
+ }
+}
+
+func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
+ // Iterate down the trie to the end, and grab the value and keylen with
+ // the highest priority.
+ bestPriority := 0
+ node := &r.root
+ n := 0
+ for node != nil {
+ if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
+ bestPriority = node.priority
+ val = node.value
+ keylen = n
+ found = true
+ }
+
+ if s == "" {
+ break
+ }
+ if node.table != nil {
+ index := r.mapping[s[0]]
+ if int(index) == r.tableSize {
+ break
+ }
+ node = node.table[index]
+ s = s[1:]
+ n++
+ } else if node.prefix != "" && HasPrefix(s, node.prefix) {
+ n += len(node.prefix)
+ s = s[len(node.prefix):]
+ node = node.next
+ } else {
+ break
+ }
+ }
+ return
+}
+
+// genericReplacer is the fully generic algorithm.
+// It's used as a fallback when nothing faster can be used.
+type genericReplacer struct {
+ root trieNode
+ // tableSize is the size of a trie node's lookup table. It is the number
+ // of unique key bytes.
+ tableSize int
+ // mapping maps from key bytes to a dense index for trieNode.table.
+ mapping [256]byte
+}
+
+func makeGenericReplacer(oldnew []string) *genericReplacer {
+ r := new(genericReplacer)
+ // Find each byte used, then assign them each an index.
+ for i := 0; i < len(oldnew); i += 2 {
+ key := oldnew[i]
+ for j := 0; j < len(key); j++ {
+ r.mapping[key[j]] = 1
+ }
+ }
+
+ for _, b := range r.mapping {
+ r.tableSize += int(b)
+ }
+
+ var index byte
+ for i, b := range r.mapping {
+ if b == 0 {
+ r.mapping[i] = byte(r.tableSize)
+ } else {
+ r.mapping[i] = index
+ index++
+ }
+ }
+ // Ensure root node uses a lookup table (for performance).
+ r.root.table = make([]*trieNode, r.tableSize)
+
+ for i := 0; i < len(oldnew); i += 2 {
+ r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
+ }
+ return r
+}
+
+type appendSliceWriter []byte
+
+// Write writes to the buffer to satisfy io.Writer.
+func (w *appendSliceWriter) Write(p []byte) (int, error) {
+ *w = append(*w, p...)
+ return len(p), nil
+}
+
+// WriteString writes to the buffer without string->[]byte->string allocations.
+func (w *appendSliceWriter) WriteString(s string) (int, error) {
+ *w = append(*w, s...)
+ return len(s), nil
+}
+
+type stringWriter struct {
+ w io.Writer
+}
+
+func (w stringWriter) WriteString(s string) (int, error) {
+ return w.w.Write([]byte(s))
+}
+
+func getStringWriter(w io.Writer) io.StringWriter {
+ sw, ok := w.(io.StringWriter)
+ if !ok {
+ sw = stringWriter{w}
+ }
+ return sw
+}
+
+func (r *genericReplacer) Replace(s string) string {
+ buf := make(appendSliceWriter, 0, len(s))
+ r.WriteString(&buf, s)
+ return string(buf)
+}
+
+func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ var last, wn int
+ var prevMatchEmpty bool
+ for i := 0; i <= len(s); {
+ // Fast path: s[i] is not a prefix of any pattern.
+ if i != len(s) && r.root.priority == 0 {
+ index := int(r.mapping[s[i]])
+ if index == r.tableSize || r.root.table[index] == nil {
+ i++
+ continue
+ }
+ }
+
+ // Ignore the empty match iff the previous loop found the empty match.
+ val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
+ prevMatchEmpty = match && keylen == 0
+ if match {
+ wn, err = sw.WriteString(s[last:i])
+ n += wn
+ if err != nil {
+ return
+ }
+ wn, err = sw.WriteString(val)
+ n += wn
+ if err != nil {
+ return
+ }
+ i += keylen
+ last = i
+ continue
+ }
+ i++
+ }
+ if last != len(s) {
+ wn, err = sw.WriteString(s[last:])
+ n += wn
+ }
+ return
+}
+
+// singleStringReplacer is the implementation that's used when there is only
+// one string to replace (and that string has more than one byte).
+type singleStringReplacer struct {
+ finder *stringFinder
+ // value is the new string that replaces that pattern when it's found.
+ value string
+}
+
+func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
+ return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
+}
+
+func (r *singleStringReplacer) Replace(s string) string {
+ var buf Builder
+ i, matched := 0, false
+ for {
+ match := r.finder.next(s[i:])
+ if match == -1 {
+ break
+ }
+ matched = true
+ buf.Grow(match + len(r.value))
+ buf.WriteString(s[i : i+match])
+ buf.WriteString(r.value)
+ i += match + len(r.finder.pattern)
+ }
+ if !matched {
+ return s
+ }
+ buf.WriteString(s[i:])
+ return buf.String()
+}
+
+func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ var i, wn int
+ for {
+ match := r.finder.next(s[i:])
+ if match == -1 {
+ break
+ }
+ wn, err = sw.WriteString(s[i : i+match])
+ n += wn
+ if err != nil {
+ return
+ }
+ wn, err = sw.WriteString(r.value)
+ n += wn
+ if err != nil {
+ return
+ }
+ i += match + len(r.finder.pattern)
+ }
+ wn, err = sw.WriteString(s[i:])
+ n += wn
+ return
+}
+
+// byteReplacer is the implementation that's used when all the "old"
+// and "new" values are single ASCII bytes.
+// The array contains replacement bytes indexed by old byte.
+type byteReplacer [256]byte
+
+func (r *byteReplacer) Replace(s string) string {
+ var buf []byte // lazily allocated
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r[b] != b {
+ if buf == nil {
+ buf = []byte(s)
+ }
+ buf[i] = r[b]
+ }
+ }
+ if buf == nil {
+ return s
+ }
+ return string(buf)
+}
+
+func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ last := 0
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r[b] == b {
+ continue
+ }
+ if last != i {
+ wn, err := sw.WriteString(s[last:i])
+ n += wn
+ if err != nil {
+ return n, err
+ }
+ }
+ last = i + 1
+ nw, err := w.Write(r[b : int(b)+1])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ if last != len(s) {
+ nw, err := sw.WriteString(s[last:])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ return n, nil
+}
+
+// byteStringReplacer is the implementation that's used when all the
+// "old" values are single ASCII bytes but the "new" values vary in size.
+type byteStringReplacer struct {
+ // replacements contains replacement byte slices indexed by old byte.
+ // A nil []byte means that the old byte should not be replaced.
+ replacements [256][]byte
+ // toReplace keeps a list of bytes to replace. Depending on length of toReplace
+ // and length of target string it may be faster to use Count, or a plain loop.
+ // We store single byte as a string, because Count takes a string.
+ toReplace []string
+}
+
+// countCutOff controls the ratio of a string length to a number of replacements
+// at which (*byteStringReplacer).Replace switches algorithms.
+// For strings with higher ration of length to replacements than that value,
+// we call Count, for each replacement from toReplace.
+// For strings, with a lower ratio we use simple loop, because of Count overhead.
+// countCutOff is an empirically determined overhead multiplier.
+// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
+const countCutOff = 8
+
+func (r *byteStringReplacer) Replace(s string) string {
+ newSize := len(s)
+ anyChanges := false
+ // Is it faster to use Count?
+ if len(r.toReplace)*countCutOff <= len(s) {
+ for _, x := range r.toReplace {
+ if c := Count(s, x); c != 0 {
+ // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
+ newSize += c * (len(r.replacements[x[0]]) - 1)
+ anyChanges = true
+ }
+
+ }
+ } else {
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] != nil {
+ // See above for explanation of -1
+ newSize += len(r.replacements[b]) - 1
+ anyChanges = true
+ }
+ }
+ }
+ if !anyChanges {
+ return s
+ }
+ buf := make([]byte, newSize)
+ j := 0
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] != nil {
+ j += copy(buf[j:], r.replacements[b])
+ } else {
+ buf[j] = b
+ j++
+ }
+ }
+ return string(buf)
+}
+
+func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ last := 0
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] == nil {
+ continue
+ }
+ if last != i {
+ nw, err := sw.WriteString(s[last:i])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ last = i + 1
+ nw, err := w.Write(r.replacements[b])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ if last != len(s) {
+ var nw int
+ nw, err = sw.WriteString(s[last:])
+ n += nw
+ }
+ return
+}
diff --git a/src/strings/replace_test.go b/src/strings/replace_test.go
new file mode 100644
index 0000000..34b5bad
--- /dev/null
+++ b/src/strings/replace_test.go
@@ -0,0 +1,583 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ "fmt"
+ . "strings"
+ "testing"
+)
+
+var htmlEscaper = NewReplacer(
+ "&", "&amp;",
+ "<", "&lt;",
+ ">", "&gt;",
+ `"`, "&quot;",
+ "'", "&apos;",
+)
+
+var htmlUnescaper = NewReplacer(
+ "&amp;", "&",
+ "&lt;", "<",
+ "&gt;", ">",
+ "&quot;", `"`,
+ "&apos;", "'",
+)
+
+// The http package's old HTML escaping function.
+func oldHTMLEscape(s string) string {
+ s = Replace(s, "&", "&amp;", -1)
+ s = Replace(s, "<", "&lt;", -1)
+ s = Replace(s, ">", "&gt;", -1)
+ s = Replace(s, `"`, "&quot;", -1)
+ s = Replace(s, "'", "&apos;", -1)
+ return s
+}
+
+var capitalLetters = NewReplacer("a", "A", "b", "B")
+
+// TestReplacer tests the replacer implementations.
+func TestReplacer(t *testing.T) {
+ type testCase struct {
+ r *Replacer
+ in, out string
+ }
+ var testCases []testCase
+
+ // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
+ str := func(b byte) string {
+ return string([]byte{b})
+ }
+ var s []string
+
+ // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
+ s = nil
+ for i := 0; i < 256; i++ {
+ s = append(s, str(byte(i)), str(byte(i+1)))
+ }
+ inc := NewReplacer(s...)
+
+ // Test cases with 1-byte old strings, 1-byte new strings.
+ testCases = append(testCases,
+ testCase{capitalLetters, "brad", "BrAd"},
+ testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
+ testCase{capitalLetters, "", ""},
+
+ testCase{inc, "brad", "csbe"},
+ testCase{inc, "\x00\xff", "\x01\x00"},
+ testCase{inc, "", ""},
+
+ testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
+ )
+
+ // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
+ s = nil
+ for i := 0; i < 256; i++ {
+ n := i + 1 - 'a'
+ if n < 1 {
+ n = 1
+ }
+ s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
+ }
+ repeat := NewReplacer(s...)
+
+ // Test cases with 1-byte old strings, variable length new strings.
+ testCases = append(testCases,
+ testCase{htmlEscaper, "No changes", "No changes"},
+ testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
+ testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
+ testCase{htmlEscaper, "", ""},
+
+ testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
+ testCase{repeat, "abba", "abbbba"},
+ testCase{repeat, "", ""},
+
+ testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
+ )
+
+ // The remaining test cases have variable length old strings.
+
+ testCases = append(testCases,
+ testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
+ testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
+ testCase{htmlUnescaper, "", ""},
+
+ testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
+
+ testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
+
+ testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
+ )
+
+ // gen1 has multiple old strings of variable length. There is no
+ // overall non-empty common prefix, but some pairwise common prefixes.
+ gen1 := NewReplacer(
+ "aaa", "3[aaa]",
+ "aa", "2[aa]",
+ "a", "1[a]",
+ "i", "i",
+ "longerst", "most long",
+ "longer", "medium",
+ "long", "short",
+ "xx", "xx",
+ "x", "X",
+ "X", "Y",
+ "Y", "Z",
+ )
+ testCases = append(testCases,
+ testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
+ testCase{gen1, "long, longerst, longer", "short, most long, medium"},
+ testCase{gen1, "xxxxx", "xxxxX"},
+ testCase{gen1, "XiX", "YiY"},
+ testCase{gen1, "", ""},
+ )
+
+ // gen2 has multiple old strings with no pairwise common prefix.
+ gen2 := NewReplacer(
+ "roses", "red",
+ "violets", "blue",
+ "sugar", "sweet",
+ )
+ testCases = append(testCases,
+ testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
+ testCase{gen2, "", ""},
+ )
+
+ // gen3 has multiple old strings with an overall common prefix.
+ gen3 := NewReplacer(
+ "abracadabra", "poof",
+ "abracadabrakazam", "splat",
+ "abraham", "lincoln",
+ "abrasion", "scrape",
+ "abraham", "isaac",
+ )
+ testCases = append(testCases,
+ testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
+ testCase{gen3, "abrasion abracad", "scrape abracad"},
+ testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
+ testCase{gen3, "", ""},
+ )
+
+ // foo{1,2,3,4} have multiple old strings with an overall common prefix
+ // and 1- or 2- byte extensions from the common prefix.
+ foo1 := NewReplacer(
+ "foo1", "A",
+ "foo2", "B",
+ "foo3", "C",
+ )
+ foo2 := NewReplacer(
+ "foo1", "A",
+ "foo2", "B",
+ "foo31", "C",
+ "foo32", "D",
+ )
+ foo3 := NewReplacer(
+ "foo11", "A",
+ "foo12", "B",
+ "foo31", "C",
+ "foo32", "D",
+ )
+ foo4 := NewReplacer(
+ "foo12", "B",
+ "foo32", "D",
+ )
+ testCases = append(testCases,
+ testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
+ testCase{foo1, "", ""},
+
+ testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
+ testCase{foo2, "", ""},
+
+ testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
+ testCase{foo3, "", ""},
+
+ testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
+ testCase{foo4, "", ""},
+ )
+
+ // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
+ allBytes := make([]byte, 256)
+ for i := range allBytes {
+ allBytes[i] = byte(i)
+ }
+ allString := string(allBytes)
+ genAll := NewReplacer(
+ allString, "[all]",
+ "\xff", "[ff]",
+ "\x00", "[00]",
+ )
+ testCases = append(testCases,
+ testCase{genAll, allString, "[all]"},
+ testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
+ testCase{genAll, "", ""},
+ )
+
+ // Test cases with empty old strings.
+
+ blankToX1 := NewReplacer("", "X")
+ blankToX2 := NewReplacer("", "X", "", "")
+ blankHighPriority := NewReplacer("", "X", "o", "O")
+ blankLowPriority := NewReplacer("o", "O", "", "X")
+ blankNoOp1 := NewReplacer("", "")
+ blankNoOp2 := NewReplacer("", "", "", "A")
+ blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
+ testCases = append(testCases,
+ testCase{blankToX1, "foo", "XfXoXoX"},
+ testCase{blankToX1, "", "X"},
+
+ testCase{blankToX2, "foo", "XfXoXoX"},
+ testCase{blankToX2, "", "X"},
+
+ testCase{blankHighPriority, "oo", "XOXOX"},
+ testCase{blankHighPriority, "ii", "XiXiX"},
+ testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
+ testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
+ testCase{blankHighPriority, "", "X"},
+
+ testCase{blankLowPriority, "oo", "OOX"},
+ testCase{blankLowPriority, "ii", "XiXiX"},
+ testCase{blankLowPriority, "oiio", "OXiXiOX"},
+ testCase{blankLowPriority, "iooi", "XiOOXiX"},
+ testCase{blankLowPriority, "", "X"},
+
+ testCase{blankNoOp1, "foo", "foo"},
+ testCase{blankNoOp1, "", ""},
+
+ testCase{blankNoOp2, "foo", "foo"},
+ testCase{blankNoOp2, "", ""},
+
+ testCase{blankFoo, "foobarfoobaz", "XRXZX"},
+ testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
+ testCase{blankFoo, "", "X"},
+ )
+
+ // single string replacer
+
+ abcMatcher := NewReplacer("abc", "[match]")
+
+ testCases = append(testCases,
+ testCase{abcMatcher, "", ""},
+ testCase{abcMatcher, "ab", "ab"},
+ testCase{abcMatcher, "abc", "[match]"},
+ testCase{abcMatcher, "abcd", "[match]d"},
+ testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
+ )
+
+ // Issue 6659 cases (more single string replacer)
+
+ noHello := NewReplacer("Hello", "")
+ testCases = append(testCases,
+ testCase{noHello, "Hello", ""},
+ testCase{noHello, "Hellox", "x"},
+ testCase{noHello, "xHello", "x"},
+ testCase{noHello, "xHellox", "xx"},
+ )
+
+ // No-arg test cases.
+
+ nop := NewReplacer()
+ testCases = append(testCases,
+ testCase{nop, "abc", "abc"},
+ testCase{nop, "", ""},
+ )
+
+ // Run the test cases.
+
+ for i, tc := range testCases {
+ if s := tc.r.Replace(tc.in); s != tc.out {
+ t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
+ }
+ var buf bytes.Buffer
+ n, err := tc.r.WriteString(&buf, tc.in)
+ if err != nil {
+ t.Errorf("%d. WriteString: %v", i, err)
+ continue
+ }
+ got := buf.String()
+ if got != tc.out {
+ t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
+ continue
+ }
+ if n != len(tc.out) {
+ t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
+ i, tc.in, n, len(tc.out), tc.out)
+ }
+ }
+}
+
+var algorithmTestCases = []struct {
+ r *Replacer
+ want string
+}{
+ {capitalLetters, "*strings.byteReplacer"},
+ {htmlEscaper, "*strings.byteStringReplacer"},
+ {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
+ {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
+ {NewReplacer("", "X"), "*strings.genericReplacer"},
+ {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
+}
+
+// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
+func TestPickAlgorithm(t *testing.T) {
+ for i, tc := range algorithmTestCases {
+ got := fmt.Sprintf("%T", tc.r.Replacer())
+ if got != tc.want {
+ t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
+ }
+ }
+}
+
+type errWriter struct{}
+
+func (errWriter) Write(p []byte) (n int, err error) {
+ return 0, fmt.Errorf("unwritable")
+}
+
+// TestWriteStringError tests that WriteString returns an error
+// received from the underlying io.Writer.
+func TestWriteStringError(t *testing.T) {
+ for i, tc := range algorithmTestCases {
+ n, err := tc.r.WriteString(errWriter{}, "abc")
+ if n != 0 || err == nil || err.Error() != "unwritable" {
+ t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
+ }
+ }
+}
+
+// TestGenericTrieBuilding verifies the structure of the generated trie. There
+// is one node per line, and the key ending with the current line is in the
+// trie if it ends with a "+".
+func TestGenericTrieBuilding(t *testing.T) {
+ testCases := []struct{ in, out string }{
+ {"abc;abdef;abdefgh;xx;xy;z", `-
+ a-
+ .b-
+ ..c+
+ ..d-
+ ...ef+
+ .....gh+
+ x-
+ .x+
+ .y+
+ z+
+ `},
+ {"abracadabra;abracadabrakazam;abraham;abrasion", `-
+ a-
+ .bra-
+ ....c-
+ .....adabra+
+ ...........kazam+
+ ....h-
+ .....am+
+ ....s-
+ .....ion+
+ `},
+ {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
+ X+
+ Y+
+ a+
+ .a+
+ ..a+
+ i+
+ l-
+ .ong+
+ ....er+
+ ......st+
+ x+
+ .x+
+ `},
+ {"foo;;foo;foo1", `+
+ f-
+ .oo+
+ ...1+
+ `},
+ }
+
+ for _, tc := range testCases {
+ keys := Split(tc.in, ";")
+ args := make([]string, len(keys)*2)
+ for i, key := range keys {
+ args[i*2] = key
+ }
+
+ got := NewReplacer(args...).PrintTrie()
+ // Remove tabs from tc.out
+ wantbuf := make([]byte, 0, len(tc.out))
+ for i := 0; i < len(tc.out); i++ {
+ if tc.out[i] != '\t' {
+ wantbuf = append(wantbuf, tc.out[i])
+ }
+ }
+ want := string(wantbuf)
+
+ if got != want {
+ t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
+ }
+ }
+}
+
+func BenchmarkGenericNoMatch(b *testing.B) {
+ str := Repeat("A", 100) + Repeat("B", 100)
+ generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
+ for i := 0; i < b.N; i++ {
+ generic.Replace(str)
+ }
+}
+
+func BenchmarkGenericMatch1(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ generic := NewReplacer("a", "A", "b", "B", "12", "123")
+ for i := 0; i < b.N; i++ {
+ generic.Replace(str)
+ }
+}
+
+func BenchmarkGenericMatch2(b *testing.B) {
+ str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
+ for i := 0; i < b.N; i++ {
+ htmlUnescaper.Replace(str)
+ }
+}
+
+func benchmarkSingleString(b *testing.B, pattern, text string) {
+ r := NewReplacer(pattern, "[match]")
+ b.SetBytes(int64(len(text)))
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ r.Replace(text)
+ }
+}
+
+func BenchmarkSingleMaxSkipping(b *testing.B) {
+ benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
+}
+
+func BenchmarkSingleLongSuffixFail(b *testing.B) {
+ benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
+}
+
+func BenchmarkSingleMatch(b *testing.B) {
+ benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
+}
+
+func BenchmarkByteByteNoMatch(b *testing.B) {
+ str := Repeat("A", 100) + Repeat("B", 100)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.Replace(str)
+ }
+}
+
+func BenchmarkByteByteMatch(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.Replace(str)
+ }
+}
+
+func BenchmarkByteStringMatch(b *testing.B) {
+ str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeNew(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.Replace(str)
+ }
+}
+
+func BenchmarkHTMLEscapeOld(b *testing.B) {
+ str := "I <3 to escape HTML & other text too."
+ for i := 0; i < b.N; i++ {
+ oldHTMLEscape(str)
+ }
+}
+
+func BenchmarkByteStringReplacerWriteString(b *testing.B) {
+ str := Repeat("I <3 to escape HTML & other text too.", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ htmlEscaper.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
+func BenchmarkByteReplacerWriteString(b *testing.B) {
+ str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
+ buf := new(bytes.Buffer)
+ for i := 0; i < b.N; i++ {
+ capitalLetters.WriteString(buf, str)
+ buf.Reset()
+ }
+}
+
+// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
+func BenchmarkByteByteReplaces(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ for i := 0; i < b.N; i++ {
+ Replace(Replace(str, "a", "A", -1), "b", "B", -1)
+ }
+}
+
+// BenchmarkByteByteMap compares byteByteImpl against Map.
+func BenchmarkByteByteMap(b *testing.B) {
+ str := Repeat("a", 100) + Repeat("b", 100)
+ fn := func(r rune) rune {
+ switch r {
+ case 'a':
+ return 'A'
+ case 'b':
+ return 'B'
+ }
+ return r
+ }
+ for i := 0; i < b.N; i++ {
+ Map(fn, str)
+ }
+}
+
+var mapdata = []struct{ name, data string }{
+ {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
+ {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
+}
+
+func BenchmarkMap(b *testing.B) {
+ mapidentity := func(r rune) rune {
+ return r
+ }
+
+ b.Run("identity", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapidentity, md.data)
+ }
+ })
+ }
+ })
+
+ mapchange := func(r rune) rune {
+ if 'a' <= r && r <= 'z' {
+ return r + 'A' - 'a'
+ }
+ if 'α' <= r && r <= 'ω' {
+ return r + 'Α' - 'α'
+ }
+ return r
+ }
+
+ b.Run("change", func(b *testing.B) {
+ for _, md := range mapdata {
+ b.Run(md.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Map(mapchange, md.data)
+ }
+ })
+ }
+ })
+}
diff --git a/src/strings/search.go b/src/strings/search.go
new file mode 100644
index 0000000..e5bffbb
--- /dev/null
+++ b/src/strings/search.go
@@ -0,0 +1,124 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+// stringFinder efficiently finds strings in a source text. It's implemented
+// using the Boyer-Moore string search algorithm:
+// https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+// https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
+// document uses 1-based indexing)
+type stringFinder struct {
+ // pattern is the string that we are searching for in the text.
+ pattern string
+
+ // badCharSkip[b] contains the distance between the last byte of pattern
+ // and the rightmost occurrence of b in pattern. If b is not in pattern,
+ // badCharSkip[b] is len(pattern).
+ //
+ // Whenever a mismatch is found with byte b in the text, we can safely
+ // shift the matching frame at least badCharSkip[b] until the next time
+ // the matching char could be in alignment.
+ badCharSkip [256]int
+
+ // goodSuffixSkip[i] defines how far we can shift the matching frame given
+ // that the suffix pattern[i+1:] matches, but the byte pattern[i] does
+ // not. There are two cases to consider:
+ //
+ // 1. The matched suffix occurs elsewhere in pattern (with a different
+ // byte preceding it that we might possibly match). In this case, we can
+ // shift the matching frame to align with the next suffix chunk. For
+ // example, the pattern "mississi" has the suffix "issi" next occurring
+ // (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
+ // shift+len(suffix) == 3+4 == 7.
+ //
+ // 2. If the matched suffix does not occur elsewhere in pattern, then the
+ // matching frame may share part of its prefix with the end of the
+ // matching suffix. In this case, goodSuffixSkip[i] will contain how far
+ // to shift the frame to align this portion of the prefix to the
+ // suffix. For example, in the pattern "abcxxxabc", when the first
+ // mismatch from the back is found to be in position 3, the matching
+ // suffix "xxabc" is not found elsewhere in the pattern. However, its
+ // rightmost "abc" (at position 6) is a prefix of the whole pattern, so
+ // goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
+ goodSuffixSkip []int
+}
+
+func makeStringFinder(pattern string) *stringFinder {
+ f := &stringFinder{
+ pattern: pattern,
+ goodSuffixSkip: make([]int, len(pattern)),
+ }
+ // last is the index of the last character in the pattern.
+ last := len(pattern) - 1
+
+ // Build bad character table.
+ // Bytes not in the pattern can skip one pattern's length.
+ for i := range f.badCharSkip {
+ f.badCharSkip[i] = len(pattern)
+ }
+ // The loop condition is < instead of <= so that the last byte does not
+ // have a zero distance to itself. Finding this byte out of place implies
+ // that it is not in the last position.
+ for i := 0; i < last; i++ {
+ f.badCharSkip[pattern[i]] = last - i
+ }
+
+ // Build good suffix table.
+ // First pass: set each value to the next index which starts a prefix of
+ // pattern.
+ lastPrefix := last
+ for i := last; i >= 0; i-- {
+ if HasPrefix(pattern, pattern[i+1:]) {
+ lastPrefix = i + 1
+ }
+ // lastPrefix is the shift, and (last-i) is len(suffix).
+ f.goodSuffixSkip[i] = lastPrefix + last - i
+ }
+ // Second pass: find repeats of pattern's suffix starting from the front.
+ for i := 0; i < last; i++ {
+ lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
+ if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
+ // (last-i) is the shift, and lenSuffix is len(suffix).
+ f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
+ }
+ }
+
+ return f
+}
+
+func longestCommonSuffix(a, b string) (i int) {
+ for ; i < len(a) && i < len(b); i++ {
+ if a[len(a)-1-i] != b[len(b)-1-i] {
+ break
+ }
+ }
+ return
+}
+
+// next returns the index in text of the first occurrence of the pattern. If
+// the pattern is not found, it returns -1.
+func (f *stringFinder) next(text string) int {
+ i := len(f.pattern) - 1
+ for i < len(text) {
+ // Compare backwards from the end until the first unmatching character.
+ j := len(f.pattern) - 1
+ for j >= 0 && text[i] == f.pattern[j] {
+ i--
+ j--
+ }
+ if j < 0 {
+ return i + 1 // match
+ }
+ i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
+ }
+ return -1
+}
+
+func max(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
diff --git a/src/strings/search_test.go b/src/strings/search_test.go
new file mode 100644
index 0000000..c01a393
--- /dev/null
+++ b/src/strings/search_test.go
@@ -0,0 +1,90 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "reflect"
+ . "strings"
+ "testing"
+)
+
+func TestFinderNext(t *testing.T) {
+ testCases := []struct {
+ pat, text string
+ index int
+ }{
+ {"", "", 0},
+ {"", "abc", 0},
+ {"abc", "", -1},
+ {"abc", "abc", 0},
+ {"d", "abcdefg", 3},
+ {"nan", "banana", 2},
+ {"pan", "anpanman", 2},
+ {"nnaaman", "anpanmanam", -1},
+ {"abcd", "abc", -1},
+ {"abcd", "bcd", -1},
+ {"bcd", "abcd", 1},
+ {"abc", "acca", -1},
+ {"aa", "aaa", 0},
+ {"baa", "aaaaa", -1},
+ {"at that", "which finally halts. at that point", 22},
+ }
+
+ for _, tc := range testCases {
+ got := StringFind(tc.pat, tc.text)
+ want := tc.index
+ if got != want {
+ t.Errorf("stringFind(%q, %q) got %d, want %d\n", tc.pat, tc.text, got, want)
+ }
+ }
+}
+
+func TestFinderCreation(t *testing.T) {
+ testCases := []struct {
+ pattern string
+ bad [256]int
+ suf []int
+ }{
+ {
+ "abc",
+ [256]int{'a': 2, 'b': 1, 'c': 3},
+ []int{5, 4, 1},
+ },
+ {
+ "mississi",
+ [256]int{'i': 3, 'm': 7, 's': 1},
+ []int{15, 14, 13, 7, 11, 10, 7, 1},
+ },
+ // From https://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
+ {
+ "abcxxxabc",
+ [256]int{'a': 2, 'b': 1, 'c': 6, 'x': 3},
+ []int{14, 13, 12, 11, 10, 9, 11, 10, 1},
+ },
+ {
+ "abyxcdeyx",
+ [256]int{'a': 8, 'b': 7, 'c': 4, 'd': 3, 'e': 2, 'y': 1, 'x': 5},
+ []int{17, 16, 15, 14, 13, 12, 7, 10, 1},
+ },
+ }
+
+ for _, tc := range testCases {
+ bad, good := DumpTables(tc.pattern)
+
+ for i, got := range bad {
+ want := tc.bad[i]
+ if want == 0 {
+ want = len(tc.pattern)
+ }
+ if got != want {
+ t.Errorf("boyerMoore(%q) bad['%c']: got %d want %d", tc.pattern, i, got, want)
+ }
+ }
+
+ if !reflect.DeepEqual(good, tc.suf) {
+ t.Errorf("boyerMoore(%q) got %v want %v", tc.pattern, good, tc.suf)
+ }
+ }
+}
diff --git a/src/strings/strings.go b/src/strings/strings.go
new file mode 100644
index 0000000..2dd4321
--- /dev/null
+++ b/src/strings/strings.go
@@ -0,0 +1,1305 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package strings implements simple functions to manipulate UTF-8 encoded strings.
+//
+// For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
+package strings
+
+import (
+ "internal/bytealg"
+ "unicode"
+ "unicode/utf8"
+)
+
+const maxInt = int(^uint(0) >> 1)
+
+// explode splits s into a slice of UTF-8 strings,
+// one string per Unicode character up to a maximum of n (n < 0 means no limit).
+// Invalid UTF-8 bytes are sliced individually.
+func explode(s string, n int) []string {
+ l := utf8.RuneCountInString(s)
+ if n < 0 || n > l {
+ n = l
+ }
+ a := make([]string, n)
+ for i := 0; i < n-1; i++ {
+ _, size := utf8.DecodeRuneInString(s)
+ a[i] = s[:size]
+ s = s[size:]
+ }
+ if n > 0 {
+ a[n-1] = s
+ }
+ return a
+}
+
+// Count counts the number of non-overlapping instances of substr in s.
+// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
+func Count(s, substr string) int {
+ // special case
+ if len(substr) == 0 {
+ return utf8.RuneCountInString(s) + 1
+ }
+ if len(substr) == 1 {
+ return bytealg.CountString(s, substr[0])
+ }
+ n := 0
+ for {
+ i := Index(s, substr)
+ if i == -1 {
+ return n
+ }
+ n++
+ s = s[i+len(substr):]
+ }
+}
+
+// Contains reports whether substr is within s.
+func Contains(s, substr string) bool {
+ return Index(s, substr) >= 0
+}
+
+// ContainsAny reports whether any Unicode code points in chars are within s.
+func ContainsAny(s, chars string) bool {
+ return IndexAny(s, chars) >= 0
+}
+
+// ContainsRune reports whether the Unicode code point r is within s.
+func ContainsRune(s string, r rune) bool {
+ return IndexRune(s, r) >= 0
+}
+
+// ContainsFunc reports whether any Unicode code points r within s satisfy f(r).
+func ContainsFunc(s string, f func(rune) bool) bool {
+ return IndexFunc(s, f) >= 0
+}
+
+// LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
+func LastIndex(s, substr string) int {
+ n := len(substr)
+ switch {
+ case n == 0:
+ return len(s)
+ case n == 1:
+ return LastIndexByte(s, substr[0])
+ case n == len(s):
+ if substr == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ }
+ // Rabin-Karp search from the end of the string
+ hashss, pow := bytealg.HashStrRev(substr)
+ last := len(s) - n
+ var h uint32
+ for i := len(s) - 1; i >= last; i-- {
+ h = h*bytealg.PrimeRK + uint32(s[i])
+ }
+ if h == hashss && s[last:] == substr {
+ return last
+ }
+ for i := last - 1; i >= 0; i-- {
+ h *= bytealg.PrimeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i+n])
+ if h == hashss && s[i:i+n] == substr {
+ return i
+ }
+ }
+ return -1
+}
+
+// IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
+func IndexByte(s string, c byte) int {
+ return bytealg.IndexByteString(s, c)
+}
+
+// IndexRune returns the index of the first instance of the Unicode code point
+// r, or -1 if rune is not present in s.
+// If r is utf8.RuneError, it returns the first instance of any
+// invalid UTF-8 byte sequence.
+func IndexRune(s string, r rune) int {
+ switch {
+ case 0 <= r && r < utf8.RuneSelf:
+ return IndexByte(s, byte(r))
+ case r == utf8.RuneError:
+ for i, r := range s {
+ if r == utf8.RuneError {
+ return i
+ }
+ }
+ return -1
+ case !utf8.ValidRune(r):
+ return -1
+ default:
+ return Index(s, string(r))
+ }
+}
+
+// IndexAny returns the index of the first instance of any Unicode code point
+// from chars in s, or -1 if no Unicode code point from chars is present in s.
+func IndexAny(s, chars string) int {
+ if chars == "" {
+ // Avoid scanning all of s.
+ return -1
+ }
+ if len(chars) == 1 {
+ // Avoid scanning all of s.
+ r := rune(chars[0])
+ if r >= utf8.RuneSelf {
+ r = utf8.RuneError
+ }
+ return IndexRune(s, r)
+ }
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := 0; i < len(s); i++ {
+ if as.contains(s[i]) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
+ for i, c := range s {
+ if IndexRune(chars, c) >= 0 {
+ return i
+ }
+ }
+ return -1
+}
+
+// LastIndexAny returns the index of the last instance of any Unicode code
+// point from chars in s, or -1 if no Unicode code point from chars is
+// present in s.
+func LastIndexAny(s, chars string) int {
+ if chars == "" {
+ // Avoid scanning all of s.
+ return -1
+ }
+ if len(s) == 1 {
+ rc := rune(s[0])
+ if rc >= utf8.RuneSelf {
+ rc = utf8.RuneError
+ }
+ if IndexRune(chars, rc) >= 0 {
+ return 0
+ }
+ return -1
+ }
+ if len(s) > 8 {
+ if as, isASCII := makeASCIISet(chars); isASCII {
+ for i := len(s) - 1; i >= 0; i-- {
+ if as.contains(s[i]) {
+ return i
+ }
+ }
+ return -1
+ }
+ }
+ if len(chars) == 1 {
+ rc := rune(chars[0])
+ if rc >= utf8.RuneSelf {
+ rc = utf8.RuneError
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRuneInString(s[:i])
+ i -= size
+ if rc == r {
+ return i
+ }
+ }
+ return -1
+ }
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRuneInString(s[:i])
+ i -= size
+ if IndexRune(chars, r) >= 0 {
+ return i
+ }
+ }
+ return -1
+}
+
+// LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
+func LastIndexByte(s string, c byte) int {
+ for i := len(s) - 1; i >= 0; i-- {
+ if s[i] == c {
+ return i
+ }
+ }
+ return -1
+}
+
+// Generic split: splits after each instance of sep,
+// including sepSave bytes of sep in the subarrays.
+func genSplit(s, sep string, sepSave, n int) []string {
+ if n == 0 {
+ return nil
+ }
+ if sep == "" {
+ return explode(s, n)
+ }
+ if n < 0 {
+ n = Count(s, sep) + 1
+ }
+
+ if n > len(s)+1 {
+ n = len(s) + 1
+ }
+ a := make([]string, n)
+ n--
+ i := 0
+ for i < n {
+ m := Index(s, sep)
+ if m < 0 {
+ break
+ }
+ a[i] = s[:m+sepSave]
+ s = s[m+len(sep):]
+ i++
+ }
+ a[i] = s
+ return a[:i+1]
+}
+
+// SplitN slices s into substrings separated by sep and returns a slice of
+// the substrings between those separators.
+//
+// The count determines the number of substrings to return:
+//
+// n > 0: at most n substrings; the last substring will be the unsplit remainder.
+// n == 0: the result is nil (zero substrings)
+// n < 0: all substrings
+//
+// Edge cases for s and sep (for example, empty strings) are handled
+// as described in the documentation for Split.
+//
+// To split around the first instance of a separator, see Cut.
+func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
+
+// SplitAfterN slices s into substrings after each instance of sep and
+// returns a slice of those substrings.
+//
+// The count determines the number of substrings to return:
+//
+// n > 0: at most n substrings; the last substring will be the unsplit remainder.
+// n == 0: the result is nil (zero substrings)
+// n < 0: all substrings
+//
+// Edge cases for s and sep (for example, empty strings) are handled
+// as described in the documentation for SplitAfter.
+func SplitAfterN(s, sep string, n int) []string {
+ return genSplit(s, sep, len(sep), n)
+}
+
+// Split slices s into all substrings separated by sep and returns a slice of
+// the substrings between those separators.
+//
+// If s does not contain sep and sep is not empty, Split returns a
+// slice of length 1 whose only element is s.
+//
+// If sep is empty, Split splits after each UTF-8 sequence. If both s
+// and sep are empty, Split returns an empty slice.
+//
+// It is equivalent to SplitN with a count of -1.
+//
+// To split around the first instance of a separator, see Cut.
+func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
+
+// SplitAfter slices s into all substrings after each instance of sep and
+// returns a slice of those substrings.
+//
+// If s does not contain sep and sep is not empty, SplitAfter returns
+// a slice of length 1 whose only element is s.
+//
+// If sep is empty, SplitAfter splits after each UTF-8 sequence. If
+// both s and sep are empty, SplitAfter returns an empty slice.
+//
+// It is equivalent to SplitAfterN with a count of -1.
+func SplitAfter(s, sep string) []string {
+ return genSplit(s, sep, len(sep), -1)
+}
+
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
+// Fields splits the string s around each instance of one or more consecutive white space
+// characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
+// empty slice if s contains only white space.
+func Fields(s string) []string {
+ // First count the fields.
+ // This is an exact count if s is ASCII, otherwise it is an approximation.
+ n := 0
+ wasSpace := 1
+ // setBits is used to track which bits are set in the bytes of s.
+ setBits := uint8(0)
+ for i := 0; i < len(s); i++ {
+ r := s[i]
+ setBits |= r
+ isSpace := int(asciiSpace[r])
+ n += wasSpace & ^isSpace
+ wasSpace = isSpace
+ }
+
+ if setBits >= utf8.RuneSelf {
+ // Some runes in the input string are not ASCII.
+ return FieldsFunc(s, unicode.IsSpace)
+ }
+ // ASCII fast path
+ a := make([]string, n)
+ na := 0
+ fieldStart := 0
+ i := 0
+ // Skip spaces in the front of the input.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
+ i++
+ }
+ fieldStart = i
+ for i < len(s) {
+ if asciiSpace[s[i]] == 0 {
+ i++
+ continue
+ }
+ a[na] = s[fieldStart:i]
+ na++
+ i++
+ // Skip spaces in between fields.
+ for i < len(s) && asciiSpace[s[i]] != 0 {
+ i++
+ }
+ fieldStart = i
+ }
+ if fieldStart < len(s) { // Last field might end at EOF.
+ a[na] = s[fieldStart:]
+ }
+ return a
+}
+
+// FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
+// and returns an array of slices of s. If all code points in s satisfy f(c) or the
+// string is empty, an empty slice is returned.
+//
+// FieldsFunc makes no guarantees about the order in which it calls f(c)
+// and assumes that f always returns the same value for a given c.
+func FieldsFunc(s string, f func(rune) bool) []string {
+ // A span is used to record a slice of s of the form s[start:end].
+ // The start index is inclusive and the end index is exclusive.
+ type span struct {
+ start int
+ end int
+ }
+ spans := make([]span, 0, 32)
+
+ // Find the field start and end indices.
+ // Doing this in a separate pass (rather than slicing the string s
+ // and collecting the result substrings right away) is significantly
+ // more efficient, possibly due to cache effects.
+ start := -1 // valid span start if >= 0
+ for end, rune := range s {
+ if f(rune) {
+ if start >= 0 {
+ spans = append(spans, span{start, end})
+ // Set start to a negative value.
+ // Note: using -1 here consistently and reproducibly
+ // slows down this code by a several percent on amd64.
+ start = ^start
+ }
+ } else {
+ if start < 0 {
+ start = end
+ }
+ }
+ }
+
+ // Last field might end at EOF.
+ if start >= 0 {
+ spans = append(spans, span{start, len(s)})
+ }
+
+ // Create strings from recorded field indices.
+ a := make([]string, len(spans))
+ for i, span := range spans {
+ a[i] = s[span.start:span.end]
+ }
+
+ return a
+}
+
+// Join concatenates the elements of its first argument to create a single string. The separator
+// string sep is placed between elements in the resulting string.
+func Join(elems []string, sep string) string {
+ switch len(elems) {
+ case 0:
+ return ""
+ case 1:
+ return elems[0]
+ }
+
+ var n int
+ if len(sep) > 0 {
+ if len(sep) >= maxInt/(len(elems)-1) {
+ panic("strings: Join output length overflow")
+ }
+ n += len(sep) * (len(elems) - 1)
+ }
+ for _, elem := range elems {
+ if len(elem) > maxInt-n {
+ panic("strings: Join output length overflow")
+ }
+ n += len(elem)
+ }
+
+ var b Builder
+ b.Grow(n)
+ b.WriteString(elems[0])
+ for _, s := range elems[1:] {
+ b.WriteString(sep)
+ b.WriteString(s)
+ }
+ return b.String()
+}
+
+// HasPrefix tests whether the string s begins with prefix.
+func HasPrefix(s, prefix string) bool {
+ return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
+}
+
+// HasSuffix tests whether the string s ends with suffix.
+func HasSuffix(s, suffix string) bool {
+ return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
+}
+
+// Map returns a copy of the string s with all its characters modified
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the string with no replacement.
+func Map(mapping func(rune) rune, s string) string {
+ // In the worst case, the string can grow when mapped, making
+ // things unpleasant. But it's so rare we barge in assuming it's
+ // fine. It could also shrink but that falls out naturally.
+
+ // The output buffer b is initialized on demand, the first
+ // time a character differs.
+ var b Builder
+
+ for i, c := range s {
+ r := mapping(c)
+ if r == c && c != utf8.RuneError {
+ continue
+ }
+
+ var width int
+ if c == utf8.RuneError {
+ c, width = utf8.DecodeRuneInString(s[i:])
+ if width != 1 && r == c {
+ continue
+ }
+ } else {
+ width = utf8.RuneLen(c)
+ }
+
+ b.Grow(len(s) + utf8.UTFMax)
+ b.WriteString(s[:i])
+ if r >= 0 {
+ b.WriteRune(r)
+ }
+
+ s = s[i+width:]
+ break
+ }
+
+ // Fast path for unchanged input
+ if b.Cap() == 0 { // didn't call b.Grow above
+ return s
+ }
+
+ for _, c := range s {
+ r := mapping(c)
+
+ if r >= 0 {
+ // common case
+ // Due to inlining, it is more performant to determine if WriteByte should be
+ // invoked rather than always call WriteRune
+ if r < utf8.RuneSelf {
+ b.WriteByte(byte(r))
+ } else {
+ // r is not a ASCII rune.
+ b.WriteRune(r)
+ }
+ }
+ }
+
+ return b.String()
+}
+
+// Repeat returns a new string consisting of count copies of the string s.
+//
+// It panics if count is negative or if the result of (len(s) * count)
+// overflows.
+func Repeat(s string, count int) string {
+ switch count {
+ case 0:
+ return ""
+ case 1:
+ return s
+ }
+
+ // Since we cannot return an error on overflow,
+ // we should panic if the repeat will generate an overflow.
+ // See golang.org/issue/16237.
+ if count < 0 {
+ panic("strings: negative Repeat count")
+ }
+ if len(s) >= maxInt/count {
+ panic("strings: Repeat output length overflow")
+ }
+ n := len(s) * count
+
+ if len(s) == 0 {
+ return ""
+ }
+
+ // Past a certain chunk size it is counterproductive to use
+ // larger chunks as the source of the write, as when the source
+ // is too large we are basically just thrashing the CPU D-cache.
+ // So if the result length is larger than an empirically-found
+ // limit (8KB), we stop growing the source string once the limit
+ // is reached and keep reusing the same source string - that
+ // should therefore be always resident in the L1 cache - until we
+ // have completed the construction of the result.
+ // This yields significant speedups (up to +100%) in cases where
+ // the result length is large (roughly, over L2 cache size).
+ const chunkLimit = 8 * 1024
+ chunkMax := n
+ if n > chunkLimit {
+ chunkMax = chunkLimit / len(s) * len(s)
+ if chunkMax == 0 {
+ chunkMax = len(s)
+ }
+ }
+
+ var b Builder
+ b.Grow(n)
+ b.WriteString(s)
+ for b.Len() < n {
+ chunk := n - b.Len()
+ if chunk > b.Len() {
+ chunk = b.Len()
+ }
+ if chunk > chunkMax {
+ chunk = chunkMax
+ }
+ b.WriteString(b.String()[:chunk])
+ }
+ return b.String()
+}
+
+// ToUpper returns s with all Unicode letters mapped to their upper case.
+func ToUpper(s string) string {
+ isASCII, hasLower := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasLower = hasLower || ('a' <= c && c <= 'z')
+ }
+
+ if isASCII { // optimize for ASCII-only strings.
+ if !hasLower {
+ return s
+ }
+ var (
+ b Builder
+ pos int
+ )
+ b.Grow(len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if 'a' <= c && c <= 'z' {
+ c -= 'a' - 'A'
+ if pos < i {
+ b.WriteString(s[pos:i])
+ }
+ b.WriteByte(c)
+ pos = i + 1
+ }
+ }
+ if pos < len(s) {
+ b.WriteString(s[pos:])
+ }
+ return b.String()
+ }
+ return Map(unicode.ToUpper, s)
+}
+
+// ToLower returns s with all Unicode letters mapped to their lower case.
+func ToLower(s string) string {
+ isASCII, hasUpper := true, false
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if c >= utf8.RuneSelf {
+ isASCII = false
+ break
+ }
+ hasUpper = hasUpper || ('A' <= c && c <= 'Z')
+ }
+
+ if isASCII { // optimize for ASCII-only strings.
+ if !hasUpper {
+ return s
+ }
+ var (
+ b Builder
+ pos int
+ )
+ b.Grow(len(s))
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if 'A' <= c && c <= 'Z' {
+ c += 'a' - 'A'
+ if pos < i {
+ b.WriteString(s[pos:i])
+ }
+ b.WriteByte(c)
+ pos = i + 1
+ }
+ }
+ if pos < len(s) {
+ b.WriteString(s[pos:])
+ }
+ return b.String()
+ }
+ return Map(unicode.ToLower, s)
+}
+
+// ToTitle returns a copy of the string s with all Unicode letters mapped to
+// their Unicode title case.
+func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
+
+// ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
+// upper case using the case mapping specified by c.
+func ToUpperSpecial(c unicode.SpecialCase, s string) string {
+ return Map(c.ToUpper, s)
+}
+
+// ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
+// lower case using the case mapping specified by c.
+func ToLowerSpecial(c unicode.SpecialCase, s string) string {
+ return Map(c.ToLower, s)
+}
+
+// ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
+// Unicode title case, giving priority to the special casing rules.
+func ToTitleSpecial(c unicode.SpecialCase, s string) string {
+ return Map(c.ToTitle, s)
+}
+
+// ToValidUTF8 returns a copy of the string s with each run of invalid UTF-8 byte sequences
+// replaced by the replacement string, which may be empty.
+func ToValidUTF8(s, replacement string) string {
+ var b Builder
+
+ for i, c := range s {
+ if c != utf8.RuneError {
+ continue
+ }
+
+ _, wid := utf8.DecodeRuneInString(s[i:])
+ if wid == 1 {
+ b.Grow(len(s) + len(replacement))
+ b.WriteString(s[:i])
+ s = s[i:]
+ break
+ }
+ }
+
+ // Fast path for unchanged input
+ if b.Cap() == 0 { // didn't call b.Grow above
+ return s
+ }
+
+ invalid := false // previous byte was from an invalid UTF-8 sequence
+ for i := 0; i < len(s); {
+ c := s[i]
+ if c < utf8.RuneSelf {
+ i++
+ invalid = false
+ b.WriteByte(c)
+ continue
+ }
+ _, wid := utf8.DecodeRuneInString(s[i:])
+ if wid == 1 {
+ i++
+ if !invalid {
+ invalid = true
+ b.WriteString(replacement)
+ }
+ continue
+ }
+ invalid = false
+ b.WriteString(s[i : i+wid])
+ i += wid
+ }
+
+ return b.String()
+}
+
+// isSeparator reports whether the rune could mark a word boundary.
+// TODO: update when package unicode captures more of the properties.
+func isSeparator(r rune) bool {
+ // ASCII alphanumerics and underscore are not separators
+ if r <= 0x7F {
+ switch {
+ case '0' <= r && r <= '9':
+ return false
+ case 'a' <= r && r <= 'z':
+ return false
+ case 'A' <= r && r <= 'Z':
+ return false
+ case r == '_':
+ return false
+ }
+ return true
+ }
+ // Letters and digits are not separators
+ if unicode.IsLetter(r) || unicode.IsDigit(r) {
+ return false
+ }
+ // Otherwise, all we can do for now is treat spaces as separators.
+ return unicode.IsSpace(r)
+}
+
+// Title returns a copy of the string s with all Unicode letters that begin words
+// mapped to their Unicode title case.
+//
+// Deprecated: The rule Title uses for word boundaries does not handle Unicode
+// punctuation properly. Use golang.org/x/text/cases instead.
+func Title(s string) string {
+ // Use a closure here to remember state.
+ // Hackish but effective. Depends on Map scanning in order and calling
+ // the closure once per rune.
+ prev := ' '
+ return Map(
+ func(r rune) rune {
+ if isSeparator(prev) {
+ prev = r
+ return unicode.ToTitle(r)
+ }
+ prev = r
+ return r
+ },
+ s)
+}
+
+// TrimLeftFunc returns a slice of the string s with all leading
+// Unicode code points c satisfying f(c) removed.
+func TrimLeftFunc(s string, f func(rune) bool) string {
+ i := indexFunc(s, f, false)
+ if i == -1 {
+ return ""
+ }
+ return s[i:]
+}
+
+// TrimRightFunc returns a slice of the string s with all trailing
+// Unicode code points c satisfying f(c) removed.
+func TrimRightFunc(s string, f func(rune) bool) string {
+ i := lastIndexFunc(s, f, false)
+ if i >= 0 && s[i] >= utf8.RuneSelf {
+ _, wid := utf8.DecodeRuneInString(s[i:])
+ i += wid
+ } else {
+ i++
+ }
+ return s[0:i]
+}
+
+// TrimFunc returns a slice of the string s with all leading
+// and trailing Unicode code points c satisfying f(c) removed.
+func TrimFunc(s string, f func(rune) bool) string {
+ return TrimRightFunc(TrimLeftFunc(s, f), f)
+}
+
+// IndexFunc returns the index into s of the first Unicode
+// code point satisfying f(c), or -1 if none do.
+func IndexFunc(s string, f func(rune) bool) int {
+ return indexFunc(s, f, true)
+}
+
+// LastIndexFunc returns the index into s of the last
+// Unicode code point satisfying f(c), or -1 if none do.
+func LastIndexFunc(s string, f func(rune) bool) int {
+ return lastIndexFunc(s, f, true)
+}
+
+// indexFunc is the same as IndexFunc except that if
+// truth==false, the sense of the predicate function is
+// inverted.
+func indexFunc(s string, f func(rune) bool, truth bool) int {
+ for i, r := range s {
+ if f(r) == truth {
+ return i
+ }
+ }
+ return -1
+}
+
+// lastIndexFunc is the same as LastIndexFunc except that if
+// truth==false, the sense of the predicate function is
+// inverted.
+func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
+ for i := len(s); i > 0; {
+ r, size := utf8.DecodeLastRuneInString(s[0:i])
+ i -= size
+ if f(r) == truth {
+ return i
+ }
+ }
+ return -1
+}
+
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+// This allocates a total of 32 bytes even though the upper half
+// is unused to avoid bounds checks in asciiSet.contains.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+ for i := 0; i < len(chars); i++ {
+ c := chars[i]
+ if c >= utf8.RuneSelf {
+ return as, false
+ }
+ as[c/32] |= 1 << (c % 32)
+ }
+ return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+ return (as[c/32] & (1 << (c % 32))) != 0
+}
+
+// Trim returns a slice of the string s with all leading and
+// trailing Unicode code points contained in cutset removed.
+func Trim(s, cutset string) string {
+ if s == "" || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimLeftASCII(trimRightASCII(s, &as), &as)
+ }
+ return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
+}
+
+// TrimLeft returns a slice of the string s with all leading
+// Unicode code points contained in cutset removed.
+//
+// To remove a prefix, use TrimPrefix instead.
+func TrimLeft(s, cutset string) string {
+ if s == "" || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimLeftByte(s, cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimLeftASCII(s, &as)
+ }
+ return trimLeftUnicode(s, cutset)
+}
+
+func trimLeftByte(s string, c byte) string {
+ for len(s) > 0 && s[0] == c {
+ s = s[1:]
+ }
+ return s
+}
+
+func trimLeftASCII(s string, as *asciiSet) string {
+ for len(s) > 0 {
+ if !as.contains(s[0]) {
+ break
+ }
+ s = s[1:]
+ }
+ return s
+}
+
+func trimLeftUnicode(s, cutset string) string {
+ for len(s) > 0 {
+ r, n := rune(s[0]), 1
+ if r >= utf8.RuneSelf {
+ r, n = utf8.DecodeRuneInString(s)
+ }
+ if !ContainsRune(cutset, r) {
+ break
+ }
+ s = s[n:]
+ }
+ return s
+}
+
+// TrimRight returns a slice of the string s, with all trailing
+// Unicode code points contained in cutset removed.
+//
+// To remove a suffix, use TrimSuffix instead.
+func TrimRight(s, cutset string) string {
+ if s == "" || cutset == "" {
+ return s
+ }
+ if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+ return trimRightByte(s, cutset[0])
+ }
+ if as, ok := makeASCIISet(cutset); ok {
+ return trimRightASCII(s, &as)
+ }
+ return trimRightUnicode(s, cutset)
+}
+
+func trimRightByte(s string, c byte) string {
+ for len(s) > 0 && s[len(s)-1] == c {
+ s = s[:len(s)-1]
+ }
+ return s
+}
+
+func trimRightASCII(s string, as *asciiSet) string {
+ for len(s) > 0 {
+ if !as.contains(s[len(s)-1]) {
+ break
+ }
+ s = s[:len(s)-1]
+ }
+ return s
+}
+
+func trimRightUnicode(s, cutset string) string {
+ for len(s) > 0 {
+ r, n := rune(s[len(s)-1]), 1
+ if r >= utf8.RuneSelf {
+ r, n = utf8.DecodeLastRuneInString(s)
+ }
+ if !ContainsRune(cutset, r) {
+ break
+ }
+ s = s[:len(s)-n]
+ }
+ return s
+}
+
+// TrimSpace returns a slice of the string s, with all leading
+// and trailing white space removed, as defined by Unicode.
+func TrimSpace(s string) string {
+ // Fast path for ASCII: look for the first ASCII non-space byte
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if c >= utf8.RuneSelf {
+ // If we run into a non-ASCII byte, fall back to the
+ // slower unicode-aware method on the remaining bytes
+ return TrimFunc(s[start:], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // Now look for the first ASCII non-space byte from the end
+ stop := len(s)
+ for ; stop > start; stop-- {
+ c := s[stop-1]
+ if c >= utf8.RuneSelf {
+ // start has been already trimmed above, should trim end only
+ return TrimRightFunc(s[start:stop], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // At this point s[start:stop] starts and ends with an ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have already
+ // been handled above.
+ return s[start:stop]
+}
+
+// TrimPrefix returns s without the provided leading prefix string.
+// If s doesn't start with prefix, s is returned unchanged.
+func TrimPrefix(s, prefix string) string {
+ if HasPrefix(s, prefix) {
+ return s[len(prefix):]
+ }
+ return s
+}
+
+// TrimSuffix returns s without the provided trailing suffix string.
+// If s doesn't end with suffix, s is returned unchanged.
+func TrimSuffix(s, suffix string) string {
+ if HasSuffix(s, suffix) {
+ return s[:len(s)-len(suffix)]
+ }
+ return s
+}
+
+// Replace returns a copy of the string s with the first n
+// non-overlapping instances of old replaced by new.
+// If old is empty, it matches at the beginning of the string
+// and after each UTF-8 sequence, yielding up to k+1 replacements
+// for a k-rune string.
+// If n < 0, there is no limit on the number of replacements.
+func Replace(s, old, new string, n int) string {
+ if old == new || n == 0 {
+ return s // avoid allocation
+ }
+
+ // Compute number of replacements.
+ if m := Count(s, old); m == 0 {
+ return s // avoid allocation
+ } else if n < 0 || m < n {
+ n = m
+ }
+
+ // Apply replacements to buffer.
+ var b Builder
+ b.Grow(len(s) + n*(len(new)-len(old)))
+ start := 0
+ for i := 0; i < n; i++ {
+ j := start
+ if len(old) == 0 {
+ if i > 0 {
+ _, wid := utf8.DecodeRuneInString(s[start:])
+ j += wid
+ }
+ } else {
+ j += Index(s[start:], old)
+ }
+ b.WriteString(s[start:j])
+ b.WriteString(new)
+ start = j + len(old)
+ }
+ b.WriteString(s[start:])
+ return b.String()
+}
+
+// ReplaceAll returns a copy of the string s with all
+// non-overlapping instances of old replaced by new.
+// If old is empty, it matches at the beginning of the string
+// and after each UTF-8 sequence, yielding up to k+1 replacements
+// for a k-rune string.
+func ReplaceAll(s, old, new string) string {
+ return Replace(s, old, new, -1)
+}
+
+// EqualFold reports whether s and t, interpreted as UTF-8 strings,
+// are equal under simple Unicode case-folding, which is a more general
+// form of case-insensitivity.
+func EqualFold(s, t string) bool {
+ // ASCII fast path
+ i := 0
+ for ; i < len(s) && i < len(t); i++ {
+ sr := s[i]
+ tr := t[i]
+ if sr|tr >= utf8.RuneSelf {
+ goto hasUnicode
+ }
+
+ // Easy case.
+ if tr == sr {
+ continue
+ }
+
+ // Make sr < tr to simplify what follows.
+ if tr < sr {
+ tr, sr = sr, tr
+ }
+ // ASCII only, sr/tr must be upper/lower case
+ if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+ continue
+ }
+ return false
+ }
+ // Check if we've exhausted both strings.
+ return len(s) == len(t)
+
+hasUnicode:
+ s = s[i:]
+ t = t[i:]
+ for _, sr := range s {
+ // If t is exhausted the strings are not equal.
+ if len(t) == 0 {
+ return false
+ }
+
+ // Extract first rune from second string.
+ var tr rune
+ if t[0] < utf8.RuneSelf {
+ tr, t = rune(t[0]), t[1:]
+ } else {
+ r, size := utf8.DecodeRuneInString(t)
+ tr, t = r, t[size:]
+ }
+
+ // If they match, keep going; if not, return false.
+
+ // Easy case.
+ if tr == sr {
+ continue
+ }
+
+ // Make sr < tr to simplify what follows.
+ if tr < sr {
+ tr, sr = sr, tr
+ }
+ // Fast check for ASCII.
+ if tr < utf8.RuneSelf {
+ // ASCII only, sr/tr must be upper/lower case
+ if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+ continue
+ }
+ return false
+ }
+
+ // General case. SimpleFold(x) returns the next equivalent rune > x
+ // or wraps around to smaller values.
+ r := unicode.SimpleFold(sr)
+ for r != sr && r < tr {
+ r = unicode.SimpleFold(r)
+ }
+ if r == tr {
+ continue
+ }
+ return false
+ }
+
+ // First string is empty, so check if the second one is also empty.
+ return len(t) == 0
+}
+
+// Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
+func Index(s, substr string) int {
+ n := len(substr)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, substr[0])
+ case n == len(s):
+ if substr == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ case n <= bytealg.MaxLen:
+ // Use brute force when s and substr both are small
+ if len(s) <= bytealg.MaxBruteForce {
+ return bytealg.IndexString(s, substr)
+ }
+ c0 := substr[0]
+ c1 := substr[1]
+ i := 0
+ t := len(s) - n + 1
+ fails := 0
+ for i < t {
+ if s[i] != c0 {
+ // IndexByte is faster than bytealg.IndexString, so use it as long as
+ // we're not getting lots of false positives.
+ o := IndexByte(s[i+1:t], c0)
+ if o < 0 {
+ return -1
+ }
+ i += o + 1
+ }
+ if s[i+1] == c1 && s[i:i+n] == substr {
+ return i
+ }
+ fails++
+ i++
+ // Switch to bytealg.IndexString when IndexByte produces too many false positives.
+ if fails > bytealg.Cutover(i) {
+ r := bytealg.IndexString(s[i:], substr)
+ if r >= 0 {
+ return r + i
+ }
+ return -1
+ }
+ }
+ return -1
+ }
+ c0 := substr[0]
+ c1 := substr[1]
+ i := 0
+ t := len(s) - n + 1
+ fails := 0
+ for i < t {
+ if s[i] != c0 {
+ o := IndexByte(s[i+1:t], c0)
+ if o < 0 {
+ return -1
+ }
+ i += o + 1
+ }
+ if s[i+1] == c1 && s[i:i+n] == substr {
+ return i
+ }
+ i++
+ fails++
+ if fails >= 4+i>>4 && i < t {
+ // See comment in ../bytes/bytes.go.
+ j := bytealg.IndexRabinKarp(s[i:], substr)
+ if j < 0 {
+ return -1
+ }
+ return i + j
+ }
+ }
+ return -1
+}
+
+// Cut slices s around the first instance of sep,
+// returning the text before and after sep.
+// The found result reports whether sep appears in s.
+// If sep does not appear in s, cut returns s, "", false.
+func Cut(s, sep string) (before, after string, found bool) {
+ if i := Index(s, sep); i >= 0 {
+ return s[:i], s[i+len(sep):], true
+ }
+ return s, "", false
+}
+
+// CutPrefix returns s without the provided leading prefix string
+// and reports whether it found the prefix.
+// If s doesn't start with prefix, CutPrefix returns s, false.
+// If prefix is the empty string, CutPrefix returns s, true.
+func CutPrefix(s, prefix string) (after string, found bool) {
+ if !HasPrefix(s, prefix) {
+ return s, false
+ }
+ return s[len(prefix):], true
+}
+
+// CutSuffix returns s without the provided ending suffix string
+// and reports whether it found the suffix.
+// If s doesn't end with suffix, CutSuffix returns s, false.
+// If suffix is the empty string, CutSuffix returns s, true.
+func CutSuffix(s, suffix string) (before string, found bool) {
+ if !HasSuffix(s, suffix) {
+ return s, false
+ }
+ return s[:len(s)-len(suffix)], true
+}
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go
new file mode 100644
index 0000000..f93cf68
--- /dev/null
+++ b/src/strings/strings_test.go
@@ -0,0 +1,2053 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "math"
+ "math/rand"
+ "reflect"
+ "strconv"
+ . "strings"
+ "testing"
+ "unicode"
+ "unicode/utf8"
+ "unsafe"
+)
+
+func eq(a, b []string) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := 0; i < len(a); i++ {
+ if a[i] != b[i] {
+ return false
+ }
+ }
+ return true
+}
+
+var abcd = "abcd"
+var faces = "☺☻☹"
+var commas = "1,2,3,4"
+var dots = "1....2....3....4"
+
+type IndexTest struct {
+ s string
+ sep string
+ out int
+}
+
+var indexTests = []IndexTest{
+ {"", "", 0},
+ {"", "a", -1},
+ {"", "foo", -1},
+ {"fo", "foo", -1},
+ {"foo", "foo", 0},
+ {"oofofoofooo", "f", 2},
+ {"oofofoofooo", "foo", 4},
+ {"barfoobarfoo", "foo", 3},
+ {"foo", "", 0},
+ {"foo", "o", 1},
+ {"abcABCabc", "A", 3},
+ {"jrzm6jjhorimglljrea4w3rlgosts0w2gia17hno2td4qd1jz", "jz", 47},
+ {"ekkuk5oft4eq0ocpacknhwouic1uua46unx12l37nioq9wbpnocqks6", "ks6", 52},
+ {"999f2xmimunbuyew5vrkla9cpwhmxan8o98ec", "98ec", 33},
+ {"9lpt9r98i04k8bz6c6dsrthb96bhi", "96bhi", 24},
+ {"55u558eqfaod2r2gu42xxsu631xf0zobs5840vl", "5840vl", 33},
+ // cases with one byte strings - test special case in Index()
+ {"", "a", -1},
+ {"x", "a", -1},
+ {"x", "x", 0},
+ {"abc", "a", 0},
+ {"abc", "b", 1},
+ {"abc", "c", 2},
+ {"abc", "x", -1},
+ // test special cases in Index() for short strings
+ {"", "ab", -1},
+ {"bc", "ab", -1},
+ {"ab", "ab", 0},
+ {"xab", "ab", 1},
+ {"xab"[:2], "ab", -1},
+ {"", "abc", -1},
+ {"xbc", "abc", -1},
+ {"abc", "abc", 0},
+ {"xabc", "abc", 1},
+ {"xabc"[:3], "abc", -1},
+ {"xabxc", "abc", -1},
+ {"", "abcd", -1},
+ {"xbcd", "abcd", -1},
+ {"abcd", "abcd", 0},
+ {"xabcd", "abcd", 1},
+ {"xyabcd"[:5], "abcd", -1},
+ {"xbcqq", "abcqq", -1},
+ {"abcqq", "abcqq", 0},
+ {"xabcqq", "abcqq", 1},
+ {"xyabcqq"[:6], "abcqq", -1},
+ {"xabxcqq", "abcqq", -1},
+ {"xabcqxq", "abcqq", -1},
+ {"", "01234567", -1},
+ {"32145678", "01234567", -1},
+ {"01234567", "01234567", 0},
+ {"x01234567", "01234567", 1},
+ {"x0123456x01234567", "01234567", 9},
+ {"xx01234567"[:9], "01234567", -1},
+ {"", "0123456789", -1},
+ {"3214567844", "0123456789", -1},
+ {"0123456789", "0123456789", 0},
+ {"x0123456789", "0123456789", 1},
+ {"x012345678x0123456789", "0123456789", 11},
+ {"xyz0123456789"[:12], "0123456789", -1},
+ {"x01234567x89", "0123456789", -1},
+ {"", "0123456789012345", -1},
+ {"3214567889012345", "0123456789012345", -1},
+ {"0123456789012345", "0123456789012345", 0},
+ {"x0123456789012345", "0123456789012345", 1},
+ {"x012345678901234x0123456789012345", "0123456789012345", 17},
+ {"", "01234567890123456789", -1},
+ {"32145678890123456789", "01234567890123456789", -1},
+ {"01234567890123456789", "01234567890123456789", 0},
+ {"x01234567890123456789", "01234567890123456789", 1},
+ {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
+ {"xyz01234567890123456789"[:22], "01234567890123456789", -1},
+ {"", "0123456789012345678901234567890", -1},
+ {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
+ {"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
+ {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
+ {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
+ {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
+ {"", "01234567890123456789012345678901", -1},
+ {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
+ {"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
+ {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
+ {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
+ {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
+ {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
+ {"", "0123456789012345678901234567890123456789", -1},
+ {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
+ {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
+ {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
+ {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
+ // test fallback to Rabin-Karp.
+ {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
+ {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
+}
+
+var lastIndexTests = []IndexTest{
+ {"", "", 0},
+ {"", "a", -1},
+ {"", "foo", -1},
+ {"fo", "foo", -1},
+ {"foo", "foo", 0},
+ {"foo", "f", 0},
+ {"oofofoofooo", "f", 7},
+ {"oofofoofooo", "foo", 7},
+ {"barfoobarfoo", "foo", 9},
+ {"foo", "", 3},
+ {"foo", "o", 2},
+ {"abcABCabc", "A", 3},
+ {"abcABCabc", "a", 6},
+}
+
+var indexAnyTests = []IndexTest{
+ {"", "", -1},
+ {"", "a", -1},
+ {"", "abc", -1},
+ {"a", "", -1},
+ {"a", "a", 0},
+ {"\x80", "\xffb", 0},
+ {"aaa", "a", 0},
+ {"abc", "xyz", -1},
+ {"abc", "xcz", 2},
+ {"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
+ {"aRegExp*", ".(|)*+?^$[]", 7},
+ {dots + dots + dots, " ", -1},
+ {"012abcba210", "\xffb", 4},
+ {"012\x80bcb\x80210", "\xffb", 3},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
+}
+
+var lastIndexAnyTests = []IndexTest{
+ {"", "", -1},
+ {"", "a", -1},
+ {"", "abc", -1},
+ {"a", "", -1},
+ {"a", "a", 0},
+ {"\x80", "\xffb", 0},
+ {"aaa", "a", 2},
+ {"abc", "xyz", -1},
+ {"abc", "ab", 1},
+ {"ab☺c", "x☺yz", 2},
+ {"a☺b☻c☹d", "cx", len("a☺b☻")},
+ {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
+ {"a.RegExp*", ".(|)*+?^$[]", 8},
+ {dots + dots + dots, " ", -1},
+ {"012abcba210", "\xffb", 6},
+ {"012\x80bcb\x80210", "\xffb", 7},
+ {"0123456\xcf\x80abc", "\xcfb\x80", 10},
+}
+
+// Execute f on each test case. funcName should be the name of f; it's used
+// in failure reports.
+func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
+ for _, test := range testCases {
+ actual := f(test.s, test.sep)
+ if actual != test.out {
+ t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
+ }
+ }
+}
+
+func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
+func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
+func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
+func TestLastIndexAny(t *testing.T) {
+ runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests)
+}
+
+func TestIndexByte(t *testing.T) {
+ for _, tt := range indexTests {
+ if len(tt.sep) != 1 {
+ continue
+ }
+ pos := IndexByte(tt.s, tt.sep[0])
+ if pos != tt.out {
+ t.Errorf(`IndexByte(%q, %q) = %v; want %v`, tt.s, tt.sep[0], pos, tt.out)
+ }
+ }
+}
+
+func TestLastIndexByte(t *testing.T) {
+ testCases := []IndexTest{
+ {"", "q", -1},
+ {"abcdef", "q", -1},
+ {"abcdefabcdef", "a", len("abcdef")}, // something in the middle
+ {"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
+ {"zabcdefabcdef", "z", 0}, // first byte
+ {"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
+ }
+ for _, test := range testCases {
+ actual := LastIndexByte(test.s, test.sep[0])
+ if actual != test.out {
+ t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
+ }
+ }
+}
+
+func simpleIndex(s, sep string) int {
+ n := len(sep)
+ for i := n; i <= len(s); i++ {
+ if s[i-n:i] == sep {
+ return i - n
+ }
+ }
+ return -1
+}
+
+func TestIndexRandom(t *testing.T) {
+ const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
+ for times := 0; times < 10; times++ {
+ for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
+ s1 := make([]byte, strLen)
+ for i := range s1 {
+ s1[i] = chars[rand.Intn(len(chars))]
+ }
+ s := string(s1)
+ for i := 0; i < 50; i++ {
+ begin := rand.Intn(len(s) + 1)
+ end := begin + rand.Intn(len(s)+1-begin)
+ sep := s[begin:end]
+ if i%4 == 0 {
+ pos := rand.Intn(len(sep) + 1)
+ sep = sep[:pos] + "A" + sep[pos:]
+ }
+ want := simpleIndex(s, sep)
+ res := Index(s, sep)
+ if res != want {
+ t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
+ }
+ }
+ }
+ }
+}
+
+func TestIndexRune(t *testing.T) {
+ tests := []struct {
+ in string
+ rune rune
+ want int
+ }{
+ {"", 'a', -1},
+ {"", '☺', -1},
+ {"foo", '☹', -1},
+ {"foo", 'o', 1},
+ {"foo☺bar", '☺', 3},
+ {"foo☺☻☹bar", '☹', 9},
+ {"a A x", 'A', 2},
+ {"some_text=some_value", '=', 9},
+ {"☺a", 'a', 3},
+ {"a☻☺b", '☺', 4},
+
+ // RuneError should match any invalid UTF-8 byte sequence.
+ {"�", '�', 0},
+ {"\xff", '�', 0},
+ {"☻x�", '�', len("☻x")},
+ {"☻x\xe2\x98", '�', len("☻x")},
+ {"☻x\xe2\x98�", '�', len("☻x")},
+ {"☻x\xe2\x98x", '�', len("☻x")},
+
+ // Invalid rune values should never match.
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
+ {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+ }
+ for _, tt := range tests {
+ if got := IndexRune(tt.in, tt.rune); got != tt.want {
+ t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
+ }
+ }
+
+ haystack := "test世界"
+ allocs := testing.AllocsPerRun(1000, func() {
+ if i := IndexRune(haystack, 's'); i != 2 {
+ t.Fatalf("'s' at %d; want 2", i)
+ }
+ if i := IndexRune(haystack, '世'); i != 4 {
+ t.Fatalf("'世' at %d; want 4", i)
+ }
+ })
+ if allocs != 0 && testing.CoverMode() == "" {
+ t.Errorf("expected no allocations, got %f", allocs)
+ }
+}
+
+const benchmarkString = "some_text=some☺value"
+
+func BenchmarkIndexRune(b *testing.B) {
+ if got := IndexRune(benchmarkString, '☺'); got != 14 {
+ b.Fatalf("wrong index: expected 14, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ IndexRune(benchmarkString, '☺')
+ }
+}
+
+var benchmarkLongString = Repeat(" ", 100) + benchmarkString
+
+func BenchmarkIndexRuneLongString(b *testing.B) {
+ if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
+ b.Fatalf("wrong index: expected 114, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ IndexRune(benchmarkLongString, '☺')
+ }
+}
+
+func BenchmarkIndexRuneFastPath(b *testing.B) {
+ if got := IndexRune(benchmarkString, 'v'); got != 17 {
+ b.Fatalf("wrong index: expected 17, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ IndexRune(benchmarkString, 'v')
+ }
+}
+
+func BenchmarkIndex(b *testing.B) {
+ if got := Index(benchmarkString, "v"); got != 17 {
+ b.Fatalf("wrong index: expected 17, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ Index(benchmarkString, "v")
+ }
+}
+
+func BenchmarkLastIndex(b *testing.B) {
+ if got := Index(benchmarkString, "v"); got != 17 {
+ b.Fatalf("wrong index: expected 17, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ LastIndex(benchmarkString, "v")
+ }
+}
+
+func BenchmarkIndexByte(b *testing.B) {
+ if got := IndexByte(benchmarkString, 'v'); got != 17 {
+ b.Fatalf("wrong index: expected 17, got=%d", got)
+ }
+ for i := 0; i < b.N; i++ {
+ IndexByte(benchmarkString, 'v')
+ }
+}
+
+type SplitTest struct {
+ s string
+ sep string
+ n int
+ a []string
+}
+
+var splittests = []SplitTest{
+ {"", "", -1, []string{}},
+ {abcd, "", 2, []string{"a", "bcd"}},
+ {abcd, "", 4, []string{"a", "b", "c", "d"}},
+ {abcd, "", -1, []string{"a", "b", "c", "d"}},
+ {faces, "", -1, []string{"☺", "☻", "☹"}},
+ {faces, "", 3, []string{"☺", "☻", "☹"}},
+ {faces, "", 17, []string{"☺", "☻", "☹"}},
+ {"☺�☹", "", -1, []string{"☺", "�", "☹"}},
+ {abcd, "a", 0, nil},
+ {abcd, "a", -1, []string{"", "bcd"}},
+ {abcd, "z", -1, []string{"abcd"}},
+ {commas, ",", -1, []string{"1", "2", "3", "4"}},
+ {dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
+ {faces, "☹", -1, []string{"☺☻", ""}},
+ {faces, "~", -1, []string{faces}},
+ {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
+ {"1 2", " ", 3, []string{"1", "2"}},
+ {"", "T", math.MaxInt / 4, []string{""}},
+ {"\xff-\xff", "", -1, []string{"\xff", "-", "\xff"}},
+ {"\xff-\xff", "-", -1, []string{"\xff", "\xff"}},
+}
+
+func TestSplit(t *testing.T) {
+ for _, tt := range splittests {
+ a := SplitN(tt.s, tt.sep, tt.n)
+ if !eq(a, tt.a) {
+ t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
+ continue
+ }
+ if tt.n == 0 {
+ continue
+ }
+ s := Join(a, tt.sep)
+ if s != tt.s {
+ t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
+ }
+ if tt.n < 0 {
+ b := Split(tt.s, tt.sep)
+ if !reflect.DeepEqual(a, b) {
+ t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
+ }
+ }
+ }
+}
+
+var splitaftertests = []SplitTest{
+ {abcd, "a", -1, []string{"a", "bcd"}},
+ {abcd, "z", -1, []string{"abcd"}},
+ {abcd, "", -1, []string{"a", "b", "c", "d"}},
+ {commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
+ {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
+ {faces, "☹", -1, []string{"☺☻☹", ""}},
+ {faces, "~", -1, []string{faces}},
+ {faces, "", -1, []string{"☺", "☻", "☹"}},
+ {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
+ {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
+ {"1 2", " ", 3, []string{"1 ", "2"}},
+ {"123", "", 2, []string{"1", "23"}},
+ {"123", "", 17, []string{"1", "2", "3"}},
+}
+
+func TestSplitAfter(t *testing.T) {
+ for _, tt := range splitaftertests {
+ a := SplitAfterN(tt.s, tt.sep, tt.n)
+ if !eq(a, tt.a) {
+ t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
+ continue
+ }
+ s := Join(a, "")
+ if s != tt.s {
+ t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
+ }
+ if tt.n < 0 {
+ b := SplitAfter(tt.s, tt.sep)
+ if !reflect.DeepEqual(a, b) {
+ t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
+ }
+ }
+ }
+}
+
+type FieldsTest struct {
+ s string
+ a []string
+}
+
+var fieldstests = []FieldsTest{
+ {"", []string{}},
+ {" ", []string{}},
+ {" \t ", []string{}},
+ {"\u2000", []string{}},
+ {" abc ", []string{"abc"}},
+ {"1 2 3 4", []string{"1", "2", "3", "4"}},
+ {"1 2 3 4", []string{"1", "2", "3", "4"}},
+ {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
+ {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
+ {"\u2000\u2001\u2002", []string{}},
+ {"\n™\t™\n", []string{"™", "™"}},
+ {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
+ {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
+ {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
+ {faces, []string{faces}},
+}
+
+func TestFields(t *testing.T) {
+ for _, tt := range fieldstests {
+ a := Fields(tt.s)
+ if !eq(a, tt.a) {
+ t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
+ continue
+ }
+ }
+}
+
+var FieldsFuncTests = []FieldsTest{
+ {"", []string{}},
+ {"XX", []string{}},
+ {"XXhiXXX", []string{"hi"}},
+ {"aXXbXXXcX", []string{"a", "b", "c"}},
+}
+
+func TestFieldsFunc(t *testing.T) {
+ for _, tt := range fieldstests {
+ a := FieldsFunc(tt.s, unicode.IsSpace)
+ if !eq(a, tt.a) {
+ t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
+ continue
+ }
+ }
+ pred := func(c rune) bool { return c == 'X' }
+ for _, tt := range FieldsFuncTests {
+ a := FieldsFunc(tt.s, pred)
+ if !eq(a, tt.a) {
+ t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
+ }
+ }
+}
+
+// Test case for any function which accepts and returns a single string.
+type StringTest struct {
+ in, out string
+}
+
+// Execute f on each test case. funcName should be the name of f; it's used
+// in failure reports.
+func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
+ for _, tc := range testCases {
+ actual := f(tc.in)
+ if actual != tc.out {
+ t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
+ }
+ }
+}
+
+var upperTests = []StringTest{
+ {"", ""},
+ {"ONLYUPPER", "ONLYUPPER"},
+ {"abc", "ABC"},
+ {"AbC123", "ABC123"},
+ {"azAZ09_", "AZAZ09_"},
+ {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
+ {"RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "RENAN BASTOS 93 AOSDAJDJAIDJAIDAJIAIDSJJAIDIJADSJIADJIOOKKO"},
+ {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
+ {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
+ {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
+}
+
+var lowerTests = []StringTest{
+ {"", ""},
+ {"abc", "abc"},
+ {"AbC123", "abc123"},
+ {"azAZ09_", "azaz09_"},
+ {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
+ {"renan bastos 93 AOSDAJDJAIDJAIDAJIaidsjjaidijadsjiadjiOOKKO", "renan bastos 93 aosdajdjaidjaidajiaidsjjaidijadsjiadjiookko"},
+ {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
+ {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
+ {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
+}
+
+const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
+
+var trimSpaceTests = []StringTest{
+ {"", ""},
+ {"abc", "abc"},
+ {space + "abc" + space, "abc"},
+ {" ", ""},
+ {" \t\r\n \t\t\r\r\n\n ", ""},
+ {" \t\r\n x\t\t\r\r\n\n ", "x"},
+ {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
+ {"1 \t\r\n2", "1 \t\r\n2"},
+ {" x\x80", "x\x80"},
+ {" x\xc0", "x\xc0"},
+ {"x \xc0\xc0 ", "x \xc0\xc0"},
+ {"x \xc0", "x \xc0"},
+ {"x \xc0 ", "x \xc0"},
+ {"x \xc0\xc0 ", "x \xc0\xc0"},
+ {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
+ {"x ☺ ", "x ☺"},
+}
+
+func tenRunes(ch rune) string {
+ r := make([]rune, 10)
+ for i := range r {
+ r[i] = ch
+ }
+ return string(r)
+}
+
+// User-defined self-inverse mapping function
+func rot13(r rune) rune {
+ step := rune(13)
+ if r >= 'a' && r <= 'z' {
+ return ((r - 'a' + step) % 26) + 'a'
+ }
+ if r >= 'A' && r <= 'Z' {
+ return ((r - 'A' + step) % 26) + 'A'
+ }
+ return r
+}
+
+func TestMap(t *testing.T) {
+ // Run a couple of awful growth/shrinkage tests
+ a := tenRunes('a')
+ // 1. Grow. This triggers two reallocations in Map.
+ maxRune := func(rune) rune { return unicode.MaxRune }
+ m := Map(maxRune, a)
+ expect := tenRunes(unicode.MaxRune)
+ if m != expect {
+ t.Errorf("growing: expected %q got %q", expect, m)
+ }
+
+ // 2. Shrink
+ minRune := func(rune) rune { return 'a' }
+ m = Map(minRune, tenRunes(unicode.MaxRune))
+ expect = a
+ if m != expect {
+ t.Errorf("shrinking: expected %q got %q", expect, m)
+ }
+
+ // 3. Rot13
+ m = Map(rot13, "a to zed")
+ expect = "n gb mrq"
+ if m != expect {
+ t.Errorf("rot13: expected %q got %q", expect, m)
+ }
+
+ // 4. Rot13^2
+ m = Map(rot13, Map(rot13, "a to zed"))
+ expect = "a to zed"
+ if m != expect {
+ t.Errorf("rot13: expected %q got %q", expect, m)
+ }
+
+ // 5. Drop
+ dropNotLatin := func(r rune) rune {
+ if unicode.Is(unicode.Latin, r) {
+ return r
+ }
+ return -1
+ }
+ m = Map(dropNotLatin, "Hello, 세계")
+ expect = "Hello"
+ if m != expect {
+ t.Errorf("drop: expected %q got %q", expect, m)
+ }
+
+ // 6. Identity
+ identity := func(r rune) rune {
+ return r
+ }
+ orig := "Input string that we expect not to be copied."
+ m = Map(identity, orig)
+ if unsafe.StringData(orig) != unsafe.StringData(m) {
+ t.Error("unexpected copy during identity map")
+ }
+
+ // 7. Handle invalid UTF-8 sequence
+ replaceNotLatin := func(r rune) rune {
+ if unicode.Is(unicode.Latin, r) {
+ return r
+ }
+ return utf8.RuneError
+ }
+ m = Map(replaceNotLatin, "Hello\255World")
+ expect = "Hello\uFFFDWorld"
+ if m != expect {
+ t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
+ }
+
+ // 8. Check utf8.RuneSelf and utf8.MaxRune encoding
+ encode := func(r rune) rune {
+ switch r {
+ case utf8.RuneSelf:
+ return unicode.MaxRune
+ case unicode.MaxRune:
+ return utf8.RuneSelf
+ }
+ return r
+ }
+ s := string(rune(utf8.RuneSelf)) + string(utf8.MaxRune)
+ r := string(utf8.MaxRune) + string(rune(utf8.RuneSelf)) // reverse of s
+ m = Map(encode, s)
+ if m != r {
+ t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
+ }
+ m = Map(encode, r)
+ if m != s {
+ t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
+ }
+
+ // 9. Check mapping occurs in the front, middle and back
+ trimSpaces := func(r rune) rune {
+ if unicode.IsSpace(r) {
+ return -1
+ }
+ return r
+ }
+ m = Map(trimSpaces, " abc 123 ")
+ expect = "abc123"
+ if m != expect {
+ t.Errorf("trimSpaces: expected %q got %q", expect, m)
+ }
+}
+
+func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
+
+func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
+
+var toValidUTF8Tests = []struct {
+ in string
+ repl string
+ out string
+}{
+ {"", "\uFFFD", ""},
+ {"abc", "\uFFFD", "abc"},
+ {"\uFDDD", "\uFFFD", "\uFDDD"},
+ {"a\xffb", "\uFFFD", "a\uFFFDb"},
+ {"a\xffb\uFFFD", "X", "aXb\uFFFD"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
+ {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
+ {"\xC0\xAF", "\uFFFD", "\uFFFD"},
+ {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xed\xa0\x80", "abc", "abc"},
+ {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
+ {"\xF0\x80\x80\xaf", "☺", "☺"},
+ {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+ {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
+}
+
+func TestToValidUTF8(t *testing.T) {
+ for _, tc := range toValidUTF8Tests {
+ got := ToValidUTF8(tc.in, tc.repl)
+ if got != tc.out {
+ t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
+ }
+ }
+}
+
+func BenchmarkToUpper(b *testing.B) {
+ for _, tc := range upperTests {
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToUpper(tc.in)
+ if actual != tc.out {
+ b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkToLower(b *testing.B) {
+ for _, tc := range lowerTests {
+ b.Run(tc.in, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ actual := ToLower(tc.in)
+ if actual != tc.out {
+ b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkMapNoChanges(b *testing.B) {
+ identity := func(r rune) rune {
+ return r
+ }
+ for i := 0; i < b.N; i++ {
+ Map(identity, "Some string that won't be modified.")
+ }
+}
+
+func TestSpecialCase(t *testing.T) {
+ lower := "abcçdefgğhıijklmnoöprsştuüvyz"
+ upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
+ u := ToUpperSpecial(unicode.TurkishCase, upper)
+ if u != upper {
+ t.Errorf("Upper(upper) is %s not %s", u, upper)
+ }
+ u = ToUpperSpecial(unicode.TurkishCase, lower)
+ if u != upper {
+ t.Errorf("Upper(lower) is %s not %s", u, upper)
+ }
+ l := ToLowerSpecial(unicode.TurkishCase, lower)
+ if l != lower {
+ t.Errorf("Lower(lower) is %s not %s", l, lower)
+ }
+ l = ToLowerSpecial(unicode.TurkishCase, upper)
+ if l != lower {
+ t.Errorf("Lower(upper) is %s not %s", l, lower)
+ }
+}
+
+func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
+
+var trimTests = []struct {
+ f string
+ in, arg, out string
+}{
+ {"Trim", "abba", "a", "bb"},
+ {"Trim", "abba", "ab", ""},
+ {"TrimLeft", "abba", "ab", ""},
+ {"TrimRight", "abba", "ab", ""},
+ {"TrimLeft", "abba", "a", "bba"},
+ {"TrimLeft", "abba", "b", "abba"},
+ {"TrimRight", "abba", "a", "abb"},
+ {"TrimRight", "abba", "b", "abba"},
+ {"Trim", "<tag>", "<>", "tag"},
+ {"Trim", "* listitem", " *", "listitem"},
+ {"Trim", `"quote"`, `"`, "quote"},
+ {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+ {"Trim", "\x80test\xff", "\xff", "test"},
+ {"Trim", " Ġ ", " ", "Ġ"},
+ {"Trim", " Ġİ0", "0 ", "Ġİ"},
+ //empty string tests
+ {"Trim", "abba", "", "abba"},
+ {"Trim", "", "123", ""},
+ {"Trim", "", "", ""},
+ {"TrimLeft", "abba", "", "abba"},
+ {"TrimLeft", "", "123", ""},
+ {"TrimLeft", "", "", ""},
+ {"TrimRight", "abba", "", "abba"},
+ {"TrimRight", "", "123", ""},
+ {"TrimRight", "", "", ""},
+ {"TrimRight", "☺\xc0", "☺", "☺\xc0"},
+ {"TrimPrefix", "aabb", "a", "abb"},
+ {"TrimPrefix", "aabb", "b", "aabb"},
+ {"TrimSuffix", "aabb", "a", "aabb"},
+ {"TrimSuffix", "aabb", "b", "aab"},
+}
+
+func TestTrim(t *testing.T) {
+ for _, tc := range trimTests {
+ name := tc.f
+ var f func(string, string) string
+ switch name {
+ case "Trim":
+ f = Trim
+ case "TrimLeft":
+ f = TrimLeft
+ case "TrimRight":
+ f = TrimRight
+ case "TrimPrefix":
+ f = TrimPrefix
+ case "TrimSuffix":
+ f = TrimSuffix
+ default:
+ t.Errorf("Undefined trim function %s", name)
+ }
+ actual := f(tc.in, tc.arg)
+ if actual != tc.out {
+ t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
+ }
+ }
+}
+
+func BenchmarkTrim(b *testing.B) {
+ b.ReportAllocs()
+
+ for i := 0; i < b.N; i++ {
+ for _, tc := range trimTests {
+ name := tc.f
+ var f func(string, string) string
+ switch name {
+ case "Trim":
+ f = Trim
+ case "TrimLeft":
+ f = TrimLeft
+ case "TrimRight":
+ f = TrimRight
+ case "TrimPrefix":
+ f = TrimPrefix
+ case "TrimSuffix":
+ f = TrimSuffix
+ default:
+ b.Errorf("Undefined trim function %s", name)
+ }
+ actual := f(tc.in, tc.arg)
+ if actual != tc.out {
+ b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
+ }
+ }
+ }
+}
+
+func BenchmarkToValidUTF8(b *testing.B) {
+ tests := []struct {
+ name string
+ input string
+ }{
+ {"Valid", "typical"},
+ {"InvalidASCII", "foo\xffbar"},
+ {"InvalidNonASCII", "日本語\xff日本語"},
+ }
+ replacement := "\uFFFD"
+ b.ResetTimer()
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ToValidUTF8(test.input, replacement)
+ }
+ })
+ }
+}
+
+type predicate struct {
+ f func(rune) bool
+ name string
+}
+
+var isSpace = predicate{unicode.IsSpace, "IsSpace"}
+var isDigit = predicate{unicode.IsDigit, "IsDigit"}
+var isUpper = predicate{unicode.IsUpper, "IsUpper"}
+var isValidRune = predicate{
+ func(r rune) bool {
+ return r != utf8.RuneError
+ },
+ "IsValidRune",
+}
+
+func not(p predicate) predicate {
+ return predicate{
+ func(r rune) bool {
+ return !p.f(r)
+ },
+ "not " + p.name,
+ }
+}
+
+var trimFuncTests = []struct {
+ f predicate
+ in string
+ trimOut string
+ leftOut string
+ rightOut string
+}{
+ {isSpace, space + " hello " + space,
+ "hello",
+ "hello " + space,
+ space + " hello"},
+ {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
+ "hello",
+ "hello34\u0e50\u0e51",
+ "\u0e50\u0e5212hello"},
+ {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
+ "hello",
+ "helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
+ "\u2C6F\u2C6F\u2C6F\u2C6FABCDhello"},
+ {not(isSpace), "hello" + space + "hello",
+ space,
+ space + "hello",
+ "hello" + space},
+ {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
+ "\u0e50\u0e521234\u0e50\u0e51",
+ "\u0e50\u0e521234\u0e50\u0e51helo",
+ "hello\u0e50\u0e521234\u0e50\u0e51"},
+ {isValidRune, "ab\xc0a\xc0cd",
+ "\xc0a\xc0",
+ "\xc0a\xc0cd",
+ "ab\xc0a\xc0"},
+ {not(isValidRune), "\xc0a\xc0",
+ "a",
+ "a\xc0",
+ "\xc0a"},
+ {isSpace, "",
+ "",
+ "",
+ ""},
+ {isSpace, " ",
+ "",
+ "",
+ ""},
+}
+
+func TestTrimFunc(t *testing.T) {
+ for _, tc := range trimFuncTests {
+ trimmers := []struct {
+ name string
+ trim func(s string, f func(r rune) bool) string
+ out string
+ }{
+ {"TrimFunc", TrimFunc, tc.trimOut},
+ {"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
+ {"TrimRightFunc", TrimRightFunc, tc.rightOut},
+ }
+ for _, trimmer := range trimmers {
+ actual := trimmer.trim(tc.in, tc.f.f)
+ if actual != trimmer.out {
+ t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
+ }
+ }
+ }
+}
+
+var indexFuncTests = []struct {
+ in string
+ f predicate
+ first, last int
+}{
+ {"", isValidRune, -1, -1},
+ {"abc", isDigit, -1, -1},
+ {"0123", isDigit, 0, 3},
+ {"a1b", isDigit, 1, 1},
+ {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
+ {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
+ {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
+ {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
+
+ // tests of invalid UTF-8
+ {"\x801", isDigit, 1, 1},
+ {"\x80abc", isDigit, -1, -1},
+ {"\xc0a\xc0", isValidRune, 1, 1},
+ {"\xc0a\xc0", not(isValidRune), 0, 2},
+ {"\xc0☺\xc0", not(isValidRune), 0, 4},
+ {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
+ {"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
+ {"a\xe0\x80cd", not(isValidRune), 1, 2},
+ {"\x80\x80\x80\x80", not(isValidRune), 0, 3},
+}
+
+func TestIndexFunc(t *testing.T) {
+ for _, tc := range indexFuncTests {
+ first := IndexFunc(tc.in, tc.f.f)
+ if first != tc.first {
+ t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
+ }
+ last := LastIndexFunc(tc.in, tc.f.f)
+ if last != tc.last {
+ t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
+ }
+ }
+}
+
+func equal(m string, s1, s2 string, t *testing.T) bool {
+ if s1 == s2 {
+ return true
+ }
+ e1 := Split(s1, "")
+ e2 := Split(s2, "")
+ for i, c1 := range e1 {
+ if i >= len(e2) {
+ break
+ }
+ r1, _ := utf8.DecodeRuneInString(c1)
+ r2, _ := utf8.DecodeRuneInString(e2[i])
+ if r1 != r2 {
+ t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
+ }
+ }
+ return false
+}
+
+func TestCaseConsistency(t *testing.T) {
+ // Make a string of all the runes.
+ numRunes := int(unicode.MaxRune + 1)
+ if testing.Short() {
+ numRunes = 1000
+ }
+ a := make([]rune, numRunes)
+ for i := range a {
+ a[i] = rune(i)
+ }
+ s := string(a)
+ // convert the cases.
+ upper := ToUpper(s)
+ lower := ToLower(s)
+
+ // Consistency checks
+ if n := utf8.RuneCountInString(upper); n != numRunes {
+ t.Error("rune count wrong in upper:", n)
+ }
+ if n := utf8.RuneCountInString(lower); n != numRunes {
+ t.Error("rune count wrong in lower:", n)
+ }
+ if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
+ t.Error("ToUpper(upper) consistency fail")
+ }
+ if !equal("ToLower(lower)", ToLower(lower), lower, t) {
+ t.Error("ToLower(lower) consistency fail")
+ }
+ /*
+ These fail because of non-one-to-oneness of the data, such as multiple
+ upper case 'I' mapping to 'i'. We comment them out but keep them for
+ interest.
+ For instance: CAPITAL LETTER I WITH DOT ABOVE:
+ unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
+
+ if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
+ t.Error("ToUpper(lower) consistency fail");
+ }
+ if !equal("ToLower(upper)", ToLower(upper), lower, t) {
+ t.Error("ToLower(upper) consistency fail");
+ }
+ */
+}
+
+var longString = "a" + string(make([]byte, 1<<16)) + "z"
+
+var RepeatTests = []struct {
+ in, out string
+ count int
+}{
+ {"", "", 0},
+ {"", "", 1},
+ {"", "", 2},
+ {"-", "", 0},
+ {"-", "-", 1},
+ {"-", "----------", 10},
+ {"abc ", "abc abc abc ", 3},
+ // Tests for results over the chunkLimit
+ {string(rune(0)), string(make([]byte, 1<<16)), 1 << 16},
+ {longString, longString + longString, 2},
+}
+
+func TestRepeat(t *testing.T) {
+ for _, tt := range RepeatTests {
+ a := Repeat(tt.in, tt.count)
+ if !equal("Repeat(s)", a, tt.out, t) {
+ t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
+ continue
+ }
+ }
+}
+
+func repeat(s string, count int) (err error) {
+ defer func() {
+ if r := recover(); r != nil {
+ switch v := r.(type) {
+ case error:
+ err = v
+ default:
+ err = fmt.Errorf("%s", v)
+ }
+ }
+ }()
+
+ Repeat(s, count)
+
+ return
+}
+
+// See Issue golang.org/issue/16237
+func TestRepeatCatchesOverflow(t *testing.T) {
+ tests := [...]struct {
+ s string
+ count int
+ errStr string
+ }{
+ 0: {"--", -2147483647, "negative"},
+ 1: {"", int(^uint(0) >> 1), ""},
+ 2: {"-", 10, ""},
+ 3: {"gopher", 0, ""},
+ 4: {"-", -1, "negative"},
+ 5: {"--", -102, "negative"},
+ 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
+ }
+
+ for i, tt := range tests {
+ err := repeat(tt.s, tt.count)
+ if tt.errStr == "" {
+ if err != nil {
+ t.Errorf("#%d panicked %v", i, err)
+ }
+ continue
+ }
+
+ if err == nil || !Contains(err.Error(), tt.errStr) {
+ t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
+ }
+ }
+}
+
+func runesEqual(a, b []rune) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i, r := range a {
+ if r != b[i] {
+ return false
+ }
+ }
+ return true
+}
+
+var RunesTests = []struct {
+ in string
+ out []rune
+ lossy bool
+}{
+ {"", []rune{}, false},
+ {" ", []rune{32}, false},
+ {"ABC", []rune{65, 66, 67}, false},
+ {"abc", []rune{97, 98, 99}, false},
+ {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
+ {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
+ {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
+}
+
+func TestRunes(t *testing.T) {
+ for _, tt := range RunesTests {
+ a := []rune(tt.in)
+ if !runesEqual(a, tt.out) {
+ t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
+ continue
+ }
+ if !tt.lossy {
+ // can only test reassembly if we didn't lose information
+ s := string(a)
+ if s != tt.in {
+ t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
+ }
+ }
+ }
+}
+
+func TestReadByte(t *testing.T) {
+ testStrings := []string{"", abcd, faces, commas}
+ for _, s := range testStrings {
+ reader := NewReader(s)
+ if e := reader.UnreadByte(); e == nil {
+ t.Errorf("Unreading %q at beginning: expected error", s)
+ }
+ var res bytes.Buffer
+ for {
+ b, e := reader.ReadByte()
+ if e == io.EOF {
+ break
+ }
+ if e != nil {
+ t.Errorf("Reading %q: %s", s, e)
+ break
+ }
+ res.WriteByte(b)
+ // unread and read again
+ e = reader.UnreadByte()
+ if e != nil {
+ t.Errorf("Unreading %q: %s", s, e)
+ break
+ }
+ b1, e := reader.ReadByte()
+ if e != nil {
+ t.Errorf("Reading %q after unreading: %s", s, e)
+ break
+ }
+ if b1 != b {
+ t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
+ break
+ }
+ }
+ if res.String() != s {
+ t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
+ }
+ }
+}
+
+func TestReadRune(t *testing.T) {
+ testStrings := []string{"", abcd, faces, commas}
+ for _, s := range testStrings {
+ reader := NewReader(s)
+ if e := reader.UnreadRune(); e == nil {
+ t.Errorf("Unreading %q at beginning: expected error", s)
+ }
+ res := ""
+ for {
+ r, z, e := reader.ReadRune()
+ if e == io.EOF {
+ break
+ }
+ if e != nil {
+ t.Errorf("Reading %q: %s", s, e)
+ break
+ }
+ res += string(r)
+ // unread and read again
+ e = reader.UnreadRune()
+ if e != nil {
+ t.Errorf("Unreading %q: %s", s, e)
+ break
+ }
+ r1, z1, e := reader.ReadRune()
+ if e != nil {
+ t.Errorf("Reading %q after unreading: %s", s, e)
+ break
+ }
+ if r1 != r {
+ t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
+ break
+ }
+ if z1 != z {
+ t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
+ break
+ }
+ }
+ if res != s {
+ t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
+ }
+ }
+}
+
+var UnreadRuneErrorTests = []struct {
+ name string
+ f func(*Reader)
+}{
+ {"Read", func(r *Reader) { r.Read([]byte{0}) }},
+ {"ReadByte", func(r *Reader) { r.ReadByte() }},
+ {"UnreadRune", func(r *Reader) { r.UnreadRune() }},
+ {"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
+ {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
+}
+
+func TestUnreadRuneError(t *testing.T) {
+ for _, tt := range UnreadRuneErrorTests {
+ reader := NewReader("0123456789")
+ if _, _, err := reader.ReadRune(); err != nil {
+ // should not happen
+ t.Fatal(err)
+ }
+ tt.f(reader)
+ err := reader.UnreadRune()
+ if err == nil {
+ t.Errorf("Unreading after %s: expected error", tt.name)
+ }
+ }
+}
+
+var ReplaceTests = []struct {
+ in string
+ old, new string
+ n int
+ out string
+}{
+ {"hello", "l", "L", 0, "hello"},
+ {"hello", "l", "L", -1, "heLLo"},
+ {"hello", "x", "X", -1, "hello"},
+ {"", "x", "X", -1, ""},
+ {"radar", "r", "<r>", -1, "<r>ada<r>"},
+ {"", "", "<>", -1, "<>"},
+ {"banana", "a", "<>", -1, "b<>n<>n<>"},
+ {"banana", "a", "<>", 1, "b<>nana"},
+ {"banana", "a", "<>", 1000, "b<>n<>n<>"},
+ {"banana", "an", "<>", -1, "b<><>a"},
+ {"banana", "ana", "<>", -1, "b<>na"},
+ {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
+ {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
+ {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
+ {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
+ {"banana", "", "<>", 1, "<>banana"},
+ {"banana", "a", "a", -1, "banana"},
+ {"banana", "a", "a", 1, "banana"},
+ {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
+}
+
+func TestReplace(t *testing.T) {
+ for _, tt := range ReplaceTests {
+ if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
+ t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
+ }
+ if tt.n == -1 {
+ s := ReplaceAll(tt.in, tt.old, tt.new)
+ if s != tt.out {
+ t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out)
+ }
+ }
+ }
+}
+
+var TitleTests = []struct {
+ in, out string
+}{
+ {"", ""},
+ {"a", "A"},
+ {" aaa aaa aaa ", " Aaa Aaa Aaa "},
+ {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
+ {"123a456", "123a456"},
+ {"double-blind", "Double-Blind"},
+ {"ÿøû", "Ÿøû"},
+ {"with_underscore", "With_underscore"},
+ {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
+}
+
+func TestTitle(t *testing.T) {
+ for _, tt := range TitleTests {
+ if s := Title(tt.in); s != tt.out {
+ t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
+ }
+ }
+}
+
+var ContainsTests = []struct {
+ str, substr string
+ expected bool
+}{
+ {"abc", "bc", true},
+ {"abc", "bcd", false},
+ {"abc", "", true},
+ {"", "a", false},
+
+ // cases to cover code in runtime/asm_amd64.s:indexShortStr
+ // 2-byte needle
+ {"xxxxxx", "01", false},
+ {"01xxxx", "01", true},
+ {"xx01xx", "01", true},
+ {"xxxx01", "01", true},
+ {"01xxxxx"[1:], "01", false},
+ {"xxxxx01"[:6], "01", false},
+ // 3-byte needle
+ {"xxxxxxx", "012", false},
+ {"012xxxx", "012", true},
+ {"xx012xx", "012", true},
+ {"xxxx012", "012", true},
+ {"012xxxxx"[1:], "012", false},
+ {"xxxxx012"[:7], "012", false},
+ // 4-byte needle
+ {"xxxxxxxx", "0123", false},
+ {"0123xxxx", "0123", true},
+ {"xx0123xx", "0123", true},
+ {"xxxx0123", "0123", true},
+ {"0123xxxxx"[1:], "0123", false},
+ {"xxxxx0123"[:8], "0123", false},
+ // 5-7-byte needle
+ {"xxxxxxxxx", "01234", false},
+ {"01234xxxx", "01234", true},
+ {"xx01234xx", "01234", true},
+ {"xxxx01234", "01234", true},
+ {"01234xxxxx"[1:], "01234", false},
+ {"xxxxx01234"[:9], "01234", false},
+ // 8-byte needle
+ {"xxxxxxxxxxxx", "01234567", false},
+ {"01234567xxxx", "01234567", true},
+ {"xx01234567xx", "01234567", true},
+ {"xxxx01234567", "01234567", true},
+ {"01234567xxxxx"[1:], "01234567", false},
+ {"xxxxx01234567"[:12], "01234567", false},
+ // 9-15-byte needle
+ {"xxxxxxxxxxxxx", "012345678", false},
+ {"012345678xxxx", "012345678", true},
+ {"xx012345678xx", "012345678", true},
+ {"xxxx012345678", "012345678", true},
+ {"012345678xxxxx"[1:], "012345678", false},
+ {"xxxxx012345678"[:13], "012345678", false},
+ // 16-byte needle
+ {"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
+ {"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
+ {"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
+ {"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
+ {"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
+ {"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
+ // 17-31-byte needle
+ {"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
+ {"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
+ {"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
+ {"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
+ {"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
+ {"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},
+
+ // partial match cases
+ {"xx01x", "012", false}, // 3
+ {"xx0123x", "01234", false}, // 5-7
+ {"xx01234567x", "012345678", false}, // 9-15
+ {"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
+}
+
+func TestContains(t *testing.T) {
+ for _, ct := range ContainsTests {
+ if Contains(ct.str, ct.substr) != ct.expected {
+ t.Errorf("Contains(%s, %s) = %v, want %v",
+ ct.str, ct.substr, !ct.expected, ct.expected)
+ }
+ }
+}
+
+var ContainsAnyTests = []struct {
+ str, substr string
+ expected bool
+}{
+ {"", "", false},
+ {"", "a", false},
+ {"", "abc", false},
+ {"a", "", false},
+ {"a", "a", true},
+ {"aaa", "a", true},
+ {"abc", "xyz", false},
+ {"abc", "xcz", true},
+ {"a☺b☻c☹d", "uvw☻xyz", true},
+ {"aRegExp*", ".(|)*+?^$[]", true},
+ {dots + dots + dots, " ", false},
+}
+
+func TestContainsAny(t *testing.T) {
+ for _, ct := range ContainsAnyTests {
+ if ContainsAny(ct.str, ct.substr) != ct.expected {
+ t.Errorf("ContainsAny(%s, %s) = %v, want %v",
+ ct.str, ct.substr, !ct.expected, ct.expected)
+ }
+ }
+}
+
+var ContainsRuneTests = []struct {
+ str string
+ r rune
+ expected bool
+}{
+ {"", 'a', false},
+ {"a", 'a', true},
+ {"aaa", 'a', true},
+ {"abc", 'y', false},
+ {"abc", 'c', true},
+ {"a☺b☻c☹d", 'x', false},
+ {"a☺b☻c☹d", '☻', true},
+ {"aRegExp*", '*', true},
+}
+
+func TestContainsRune(t *testing.T) {
+ for _, ct := range ContainsRuneTests {
+ if ContainsRune(ct.str, ct.r) != ct.expected {
+ t.Errorf("ContainsRune(%q, %q) = %v, want %v",
+ ct.str, ct.r, !ct.expected, ct.expected)
+ }
+ }
+}
+
+func TestContainsFunc(t *testing.T) {
+ for _, ct := range ContainsRuneTests {
+ if ContainsFunc(ct.str, func(r rune) bool {
+ return ct.r == r
+ }) != ct.expected {
+ t.Errorf("ContainsFunc(%q, func(%q)) = %v, want %v",
+ ct.str, ct.r, !ct.expected, ct.expected)
+ }
+ }
+}
+
+var EqualFoldTests = []struct {
+ s, t string
+ out bool
+}{
+ {"abc", "abc", true},
+ {"ABcd", "ABcd", true},
+ {"123abc", "123ABC", true},
+ {"αβδ", "ΑΒΔ", true},
+ {"abc", "xyz", false},
+ {"abc", "XYZ", false},
+ {"abcdefghijk", "abcdefghijX", false},
+ {"abcdefghijk", "abcdefghij\u212A", true},
+ {"abcdefghijK", "abcdefghij\u212A", true},
+ {"abcdefghijkz", "abcdefghij\u212Ay", false},
+ {"abcdefghijKz", "abcdefghij\u212Ay", false},
+ {"1", "2", false},
+ {"utf-8", "US-ASCII", false},
+}
+
+func TestEqualFold(t *testing.T) {
+ for _, tt := range EqualFoldTests {
+ if out := EqualFold(tt.s, tt.t); out != tt.out {
+ t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
+ }
+ if out := EqualFold(tt.t, tt.s); out != tt.out {
+ t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
+ }
+ }
+}
+
+func BenchmarkEqualFold(b *testing.B) {
+ b.Run("Tests", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ for _, tt := range EqualFoldTests {
+ if out := EqualFold(tt.s, tt.t); out != tt.out {
+ b.Fatal("wrong result")
+ }
+ }
+ }
+ })
+
+ const s1 = "abcdefghijKz"
+ const s2 = "abcDefGhijKz"
+
+ b.Run("ASCII", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold(s1, s2)
+ }
+ })
+
+ b.Run("UnicodePrefix", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold("αβδ"+s1, "ΑΒΔ"+s2)
+ }
+ })
+
+ b.Run("UnicodeSuffix", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ EqualFold(s1+"αβδ", s2+"ΑΒΔ")
+ }
+ })
+}
+
+var CountTests = []struct {
+ s, sep string
+ num int
+}{
+ {"", "", 1},
+ {"", "notempty", 0},
+ {"notempty", "", 9},
+ {"smaller", "not smaller", 0},
+ {"12345678987654321", "6", 2},
+ {"611161116", "6", 3},
+ {"notequal", "NotEqual", 0},
+ {"equal", "equal", 1},
+ {"abc1231231123q", "123", 3},
+ {"11111", "11", 2},
+}
+
+func TestCount(t *testing.T) {
+ for _, tt := range CountTests {
+ if num := Count(tt.s, tt.sep); num != tt.num {
+ t.Errorf("Count(%q, %q) = %d, want %d", tt.s, tt.sep, num, tt.num)
+ }
+ }
+}
+
+var cutTests = []struct {
+ s, sep string
+ before, after string
+ found bool
+}{
+ {"abc", "b", "a", "c", true},
+ {"abc", "a", "", "bc", true},
+ {"abc", "c", "ab", "", true},
+ {"abc", "abc", "", "", true},
+ {"abc", "", "", "abc", true},
+ {"abc", "d", "abc", "", false},
+ {"", "d", "", "", false},
+ {"", "", "", "", true},
+}
+
+func TestCut(t *testing.T) {
+ for _, tt := range cutTests {
+ if before, after, found := Cut(tt.s, tt.sep); before != tt.before || after != tt.after || found != tt.found {
+ t.Errorf("Cut(%q, %q) = %q, %q, %v, want %q, %q, %v", tt.s, tt.sep, before, after, found, tt.before, tt.after, tt.found)
+ }
+ }
+}
+
+var cutPrefixTests = []struct {
+ s, sep string
+ after string
+ found bool
+}{
+ {"abc", "a", "bc", true},
+ {"abc", "abc", "", true},
+ {"abc", "", "abc", true},
+ {"abc", "d", "abc", false},
+ {"", "d", "", false},
+ {"", "", "", true},
+}
+
+func TestCutPrefix(t *testing.T) {
+ for _, tt := range cutPrefixTests {
+ if after, found := CutPrefix(tt.s, tt.sep); after != tt.after || found != tt.found {
+ t.Errorf("CutPrefix(%q, %q) = %q, %v, want %q, %v", tt.s, tt.sep, after, found, tt.after, tt.found)
+ }
+ }
+}
+
+var cutSuffixTests = []struct {
+ s, sep string
+ before string
+ found bool
+}{
+ {"abc", "bc", "a", true},
+ {"abc", "abc", "", true},
+ {"abc", "", "abc", true},
+ {"abc", "d", "abc", false},
+ {"", "d", "", false},
+ {"", "", "", true},
+}
+
+func TestCutSuffix(t *testing.T) {
+ for _, tt := range cutSuffixTests {
+ if before, found := CutSuffix(tt.s, tt.sep); before != tt.before || found != tt.found {
+ t.Errorf("CutSuffix(%q, %q) = %q, %v, want %q, %v", tt.s, tt.sep, before, found, tt.before, tt.found)
+ }
+ }
+}
+
+func makeBenchInputHard() string {
+ tokens := [...]string{
+ "<a>", "<p>", "<b>", "<strong>",
+ "</a>", "</p>", "</b>", "</strong>",
+ "hello", "world",
+ }
+ x := make([]byte, 0, 1<<20)
+ for {
+ i := rand.Intn(len(tokens))
+ if len(x)+len(tokens[i]) >= 1<<20 {
+ break
+ }
+ x = append(x, tokens[i]...)
+ }
+ return string(x)
+}
+
+var benchInputHard = makeBenchInputHard()
+
+func benchmarkIndexHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ Index(benchInputHard, sep)
+ }
+}
+
+func benchmarkLastIndexHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ LastIndex(benchInputHard, sep)
+ }
+}
+
+func benchmarkCountHard(b *testing.B, sep string) {
+ for i := 0; i < b.N; i++ {
+ Count(benchInputHard, sep)
+ }
+}
+
+func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
+func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
+func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
+func BenchmarkIndexHard4(b *testing.B) {
+ benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
+}
+
+func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
+func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
+func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
+
+func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
+func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
+func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
+
+var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
+var benchNeedleTorture = Repeat("ABC", 1<<10+1)
+
+func BenchmarkIndexTorture(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Index(benchInputTorture, benchNeedleTorture)
+ }
+}
+
+func BenchmarkCountTorture(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Count(benchInputTorture, benchNeedleTorture)
+ }
+}
+
+func BenchmarkCountTortureOverlapping(b *testing.B) {
+ A := Repeat("ABC", 1<<20)
+ B := Repeat("ABC", 1<<10)
+ for i := 0; i < b.N; i++ {
+ Count(A, B)
+ }
+}
+
+func BenchmarkCountByte(b *testing.B) {
+ indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
+ benchStr := Repeat(benchmarkString,
+ (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
+ benchFunc := func(b *testing.B, benchStr string) {
+ b.SetBytes(int64(len(benchStr)))
+ for i := 0; i < b.N; i++ {
+ Count(benchStr, "=")
+ }
+ }
+ for _, size := range indexSizes {
+ b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
+ benchFunc(b, benchStr[:size])
+ })
+ }
+
+}
+
+var makeFieldsInput = func() string {
+ x := make([]byte, 1<<20)
+ // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
+ for i := range x {
+ switch rand.Intn(10) {
+ case 0:
+ x[i] = ' '
+ case 1:
+ if i > 0 && x[i-1] == 'x' {
+ copy(x[i-1:], "χ")
+ break
+ }
+ fallthrough
+ default:
+ x[i] = 'x'
+ }
+ }
+ return string(x)
+}
+
+var makeFieldsInputASCII = func() string {
+ x := make([]byte, 1<<20)
+ // Input is ~10% space, rest ASCII non-space.
+ for i := range x {
+ if rand.Intn(10) == 0 {
+ x[i] = ' '
+ } else {
+ x[i] = 'x'
+ }
+ }
+ return string(x)
+}
+
+var stringdata = []struct{ name, data string }{
+ {"ASCII", makeFieldsInputASCII()},
+ {"Mixed", makeFieldsInput()},
+}
+
+func BenchmarkFields(b *testing.B) {
+ for _, sd := range stringdata {
+ b.Run(sd.name, func(b *testing.B) {
+ for j := 1 << 4; j <= 1<<20; j <<= 4 {
+ b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+ b.ReportAllocs()
+ b.SetBytes(int64(j))
+ data := sd.data[:j]
+ for i := 0; i < b.N; i++ {
+ Fields(data)
+ }
+ })
+ }
+ })
+ }
+}
+
+func BenchmarkFieldsFunc(b *testing.B) {
+ for _, sd := range stringdata {
+ b.Run(sd.name, func(b *testing.B) {
+ for j := 1 << 4; j <= 1<<20; j <<= 4 {
+ b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+ b.ReportAllocs()
+ b.SetBytes(int64(j))
+ data := sd.data[:j]
+ for i := 0; i < b.N; i++ {
+ FieldsFunc(data, unicode.IsSpace)
+ }
+ })
+ }
+ })
+ }
+}
+
+func BenchmarkSplitEmptySeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "")
+ }
+}
+
+func BenchmarkSplitSingleByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "/")
+ }
+}
+
+func BenchmarkSplitMultiByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Split(benchInputHard, "hello")
+ }
+}
+
+func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ SplitN(benchInputHard, "/", 10)
+ }
+}
+
+func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ SplitN(benchInputHard, "hello", 10)
+ }
+}
+
+func BenchmarkRepeat(b *testing.B) {
+ s := "0123456789"
+ for _, n := range []int{5, 10} {
+ for _, c := range []int{0, 1, 2, 6} {
+ b.Run(fmt.Sprintf("%dx%d", n, c), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Repeat(s[:n], c)
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkRepeatLarge(b *testing.B) {
+ s := Repeat("@", 8*1024)
+ for j := 8; j <= 30; j++ {
+ for _, k := range []int{1, 16, 4097} {
+ s := s[:k]
+ n := (1 << j) / k
+ if n == 0 {
+ continue
+ }
+ b.Run(fmt.Sprintf("%d/%d", 1<<j, k), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ Repeat(s, n)
+ }
+ b.SetBytes(int64(n * len(s)))
+ })
+ }
+ }
+}
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+ x := Repeat("#", 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkIndexAnyUTF8(b *testing.B) {
+ x := Repeat("#", 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ IndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkLastIndexAnyASCII(b *testing.B) {
+ x := Repeat("#", 2048) // Never matches set
+ cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkLastIndexAnyUTF8(b *testing.B) {
+ x := Repeat("#", 2048) // Never matches set
+ cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
+ for k := 1; k <= 2048; k <<= 4 {
+ for j := 1; j <= 64; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ LastIndexAny(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+ cs := "0123456789abcdef"
+ for k := 1; k <= 4096; k <<= 4 {
+ for j := 1; j <= 16; j <<= 1 {
+ b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+ x := Repeat(cs[:j], k) // Always matches set
+ for i := 0; i < b.N; i++ {
+ Trim(x[:k], cs[:j])
+ }
+ })
+ }
+ }
+}
+
+func BenchmarkTrimByte(b *testing.B) {
+ x := " the quick brown fox "
+ for i := 0; i < b.N; i++ {
+ Trim(x, " ")
+ }
+}
+
+func BenchmarkIndexPeriodic(b *testing.B) {
+ key := "aa"
+ for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
+ b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
+ s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
+ for i := 0; i < b.N; i++ {
+ Index(s, key)
+ }
+ })
+ }
+}
+
+func BenchmarkJoin(b *testing.B) {
+ vals := []string{"red", "yellow", "pink", "green", "purple", "orange", "blue"}
+ for l := 0; l <= len(vals); l++ {
+ b.Run(strconv.Itoa(l), func(b *testing.B) {
+ b.ReportAllocs()
+ vals := vals[:l]
+ for i := 0; i < b.N; i++ {
+ Join(vals, " and ")
+ }
+ })
+ }
+}
+
+func BenchmarkTrimSpace(b *testing.B) {
+ tests := []struct{ name, input string }{
+ {"NoTrim", "typical"},
+ {"ASCII", " foo bar "},
+ {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
+ {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
+ }
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ TrimSpace(test.input)
+ }
+ })
+ }
+}
+
+var stringSink string
+
+func BenchmarkReplaceAll(b *testing.B) {
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ stringSink = ReplaceAll("banana", "a", "<>")
+ }
+}