summaryrefslogtreecommitdiffstats
path: root/src/strings/replace.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 13:14:23 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 13:14:23 +0000
commit73df946d56c74384511a194dd01dbe099584fd1a (patch)
treefd0bcea490dd81327ddfbb31e215439672c9a068 /src/strings/replace.go
parentInitial commit. (diff)
downloadgolang-1.16-73df946d56c74384511a194dd01dbe099584fd1a.tar.xz
golang-1.16-73df946d56c74384511a194dd01dbe099584fd1a.zip
Adding upstream version 1.16.10.upstream/1.16.10upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/strings/replace.go')
-rw-r--r--src/strings/replace.go568
1 files changed, 568 insertions, 0 deletions
diff --git a/src/strings/replace.go b/src/strings/replace.go
new file mode 100644
index 0000000..e28d428
--- /dev/null
+++ b/src/strings/replace.go
@@ -0,0 +1,568 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+import (
+ "io"
+ "sync"
+)
+
+// Replacer replaces a list of strings with replacements.
+// It is safe for concurrent use by multiple goroutines.
+type Replacer struct {
+ once sync.Once // guards buildOnce method
+ r replacer
+ oldnew []string
+}
+
+// replacer is the interface that a replacement algorithm needs to implement.
+type replacer interface {
+ Replace(s string) string
+ WriteString(w io.Writer, s string) (n int, err error)
+}
+
+// NewReplacer returns a new Replacer from a list of old, new string
+// pairs. Replacements are performed in the order they appear in the
+// target string, without overlapping matches. The old string
+// comparisons are done in argument order.
+//
+// NewReplacer panics if given an odd number of arguments.
+func NewReplacer(oldnew ...string) *Replacer {
+ if len(oldnew)%2 == 1 {
+ panic("strings.NewReplacer: odd argument count")
+ }
+ return &Replacer{oldnew: append([]string(nil), oldnew...)}
+}
+
+func (r *Replacer) buildOnce() {
+ r.r = r.build()
+ r.oldnew = nil
+}
+
+func (b *Replacer) build() replacer {
+ oldnew := b.oldnew
+ if len(oldnew) == 2 && len(oldnew[0]) > 1 {
+ return makeSingleStringReplacer(oldnew[0], oldnew[1])
+ }
+
+ allNewBytes := true
+ for i := 0; i < len(oldnew); i += 2 {
+ if len(oldnew[i]) != 1 {
+ return makeGenericReplacer(oldnew)
+ }
+ if len(oldnew[i+1]) != 1 {
+ allNewBytes = false
+ }
+ }
+
+ if allNewBytes {
+ r := byteReplacer{}
+ for i := range r {
+ r[i] = byte(i)
+ }
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1][0]
+ r[o] = n
+ }
+ return &r
+ }
+
+ r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
+ // The first occurrence of old->new map takes precedence
+ // over the others with the same old string.
+ for i := len(oldnew) - 2; i >= 0; i -= 2 {
+ o := oldnew[i][0]
+ n := oldnew[i+1]
+ // To avoid counting repetitions multiple times.
+ if r.replacements[o] == nil {
+ // We need to use string([]byte{o}) instead of string(o),
+ // to avoid utf8 encoding of o.
+ // E. g. byte(150) produces string of length 2.
+ r.toReplace = append(r.toReplace, string([]byte{o}))
+ }
+ r.replacements[o] = []byte(n)
+
+ }
+ return &r
+}
+
+// Replace returns a copy of s with all replacements performed.
+func (r *Replacer) Replace(s string) string {
+ r.once.Do(r.buildOnce)
+ return r.r.Replace(s)
+}
+
+// WriteString writes s to w with all replacements performed.
+func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
+ r.once.Do(r.buildOnce)
+ return r.r.WriteString(w, s)
+}
+
+// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
+// and values may be empty. For example, the trie containing keys "ax", "ay",
+// "bcbc", "x" and "xy" could have eight nodes:
+//
+// n0 -
+// n1 a-
+// n2 .x+
+// n3 .y+
+// n4 b-
+// n5 .cbc+
+// n6 x+
+// n7 .y+
+//
+// n0 is the root node, and its children are n1, n4 and n6; n1's children are
+// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
+// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
+// (marked with a trailing "+") are complete keys.
+type trieNode struct {
+ // value is the value of the trie node's key/value pair. It is empty if
+ // this node is not a complete key.
+ value string
+ // priority is the priority (higher is more important) of the trie node's
+ // key/value pair; keys are not necessarily matched shortest- or longest-
+ // first. Priority is positive if this node is a complete key, and zero
+ // otherwise. In the example above, positive/zero priorities are marked
+ // with a trailing "+" or "-".
+ priority int
+
+ // A trie node may have zero, one or more child nodes:
+ // * if the remaining fields are zero, there are no children.
+ // * if prefix and next are non-zero, there is one child in next.
+ // * if table is non-zero, it defines all the children.
+ //
+ // Prefixes are preferred over tables when there is one child, but the
+ // root node always uses a table for lookup efficiency.
+
+ // prefix is the difference in keys between this trie node and the next.
+ // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
+ // Node n5 has no children and so has zero prefix, next and table fields.
+ prefix string
+ next *trieNode
+
+ // table is a lookup table indexed by the next byte in the key, after
+ // remapping that byte through genericReplacer.mapping to create a dense
+ // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
+ // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
+ // genericReplacer.tableSize will be 5. Node n0's table will be
+ // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
+ // 'a', 'b' and 'x'.
+ table []*trieNode
+}
+
+func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
+ if key == "" {
+ if t.priority == 0 {
+ t.value = val
+ t.priority = priority
+ }
+ return
+ }
+
+ if t.prefix != "" {
+ // Need to split the prefix among multiple nodes.
+ var n int // length of the longest common prefix
+ for ; n < len(t.prefix) && n < len(key); n++ {
+ if t.prefix[n] != key[n] {
+ break
+ }
+ }
+ if n == len(t.prefix) {
+ t.next.add(key[n:], val, priority, r)
+ } else if n == 0 {
+ // First byte differs, start a new lookup table here. Looking up
+ // what is currently t.prefix[0] will lead to prefixNode, and
+ // looking up key[0] will lead to keyNode.
+ var prefixNode *trieNode
+ if len(t.prefix) == 1 {
+ prefixNode = t.next
+ } else {
+ prefixNode = &trieNode{
+ prefix: t.prefix[1:],
+ next: t.next,
+ }
+ }
+ keyNode := new(trieNode)
+ t.table = make([]*trieNode, r.tableSize)
+ t.table[r.mapping[t.prefix[0]]] = prefixNode
+ t.table[r.mapping[key[0]]] = keyNode
+ t.prefix = ""
+ t.next = nil
+ keyNode.add(key[1:], val, priority, r)
+ } else {
+ // Insert new node after the common section of the prefix.
+ next := &trieNode{
+ prefix: t.prefix[n:],
+ next: t.next,
+ }
+ t.prefix = t.prefix[:n]
+ t.next = next
+ next.add(key[n:], val, priority, r)
+ }
+ } else if t.table != nil {
+ // Insert into existing table.
+ m := r.mapping[key[0]]
+ if t.table[m] == nil {
+ t.table[m] = new(trieNode)
+ }
+ t.table[m].add(key[1:], val, priority, r)
+ } else {
+ t.prefix = key
+ t.next = new(trieNode)
+ t.next.add("", val, priority, r)
+ }
+}
+
+func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
+ // Iterate down the trie to the end, and grab the value and keylen with
+ // the highest priority.
+ bestPriority := 0
+ node := &r.root
+ n := 0
+ for node != nil {
+ if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
+ bestPriority = node.priority
+ val = node.value
+ keylen = n
+ found = true
+ }
+
+ if s == "" {
+ break
+ }
+ if node.table != nil {
+ index := r.mapping[s[0]]
+ if int(index) == r.tableSize {
+ break
+ }
+ node = node.table[index]
+ s = s[1:]
+ n++
+ } else if node.prefix != "" && HasPrefix(s, node.prefix) {
+ n += len(node.prefix)
+ s = s[len(node.prefix):]
+ node = node.next
+ } else {
+ break
+ }
+ }
+ return
+}
+
+// genericReplacer is the fully generic algorithm.
+// It's used as a fallback when nothing faster can be used.
+type genericReplacer struct {
+ root trieNode
+ // tableSize is the size of a trie node's lookup table. It is the number
+ // of unique key bytes.
+ tableSize int
+ // mapping maps from key bytes to a dense index for trieNode.table.
+ mapping [256]byte
+}
+
+func makeGenericReplacer(oldnew []string) *genericReplacer {
+ r := new(genericReplacer)
+ // Find each byte used, then assign them each an index.
+ for i := 0; i < len(oldnew); i += 2 {
+ key := oldnew[i]
+ for j := 0; j < len(key); j++ {
+ r.mapping[key[j]] = 1
+ }
+ }
+
+ for _, b := range r.mapping {
+ r.tableSize += int(b)
+ }
+
+ var index byte
+ for i, b := range r.mapping {
+ if b == 0 {
+ r.mapping[i] = byte(r.tableSize)
+ } else {
+ r.mapping[i] = index
+ index++
+ }
+ }
+ // Ensure root node uses a lookup table (for performance).
+ r.root.table = make([]*trieNode, r.tableSize)
+
+ for i := 0; i < len(oldnew); i += 2 {
+ r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
+ }
+ return r
+}
+
+type appendSliceWriter []byte
+
+// Write writes to the buffer to satisfy io.Writer.
+func (w *appendSliceWriter) Write(p []byte) (int, error) {
+ *w = append(*w, p...)
+ return len(p), nil
+}
+
+// WriteString writes to the buffer without string->[]byte->string allocations.
+func (w *appendSliceWriter) WriteString(s string) (int, error) {
+ *w = append(*w, s...)
+ return len(s), nil
+}
+
+type stringWriter struct {
+ w io.Writer
+}
+
+func (w stringWriter) WriteString(s string) (int, error) {
+ return w.w.Write([]byte(s))
+}
+
+func getStringWriter(w io.Writer) io.StringWriter {
+ sw, ok := w.(io.StringWriter)
+ if !ok {
+ sw = stringWriter{w}
+ }
+ return sw
+}
+
+func (r *genericReplacer) Replace(s string) string {
+ buf := make(appendSliceWriter, 0, len(s))
+ r.WriteString(&buf, s)
+ return string(buf)
+}
+
+func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ var last, wn int
+ var prevMatchEmpty bool
+ for i := 0; i <= len(s); {
+ // Fast path: s[i] is not a prefix of any pattern.
+ if i != len(s) && r.root.priority == 0 {
+ index := int(r.mapping[s[i]])
+ if index == r.tableSize || r.root.table[index] == nil {
+ i++
+ continue
+ }
+ }
+
+ // Ignore the empty match iff the previous loop found the empty match.
+ val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
+ prevMatchEmpty = match && keylen == 0
+ if match {
+ wn, err = sw.WriteString(s[last:i])
+ n += wn
+ if err != nil {
+ return
+ }
+ wn, err = sw.WriteString(val)
+ n += wn
+ if err != nil {
+ return
+ }
+ i += keylen
+ last = i
+ continue
+ }
+ i++
+ }
+ if last != len(s) {
+ wn, err = sw.WriteString(s[last:])
+ n += wn
+ }
+ return
+}
+
+// singleStringReplacer is the implementation that's used when there is only
+// one string to replace (and that string has more than one byte).
+type singleStringReplacer struct {
+ finder *stringFinder
+ // value is the new string that replaces that pattern when it's found.
+ value string
+}
+
+func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
+ return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
+}
+
+func (r *singleStringReplacer) Replace(s string) string {
+ var buf []byte
+ i, matched := 0, false
+ for {
+ match := r.finder.next(s[i:])
+ if match == -1 {
+ break
+ }
+ matched = true
+ buf = append(buf, s[i:i+match]...)
+ buf = append(buf, r.value...)
+ i += match + len(r.finder.pattern)
+ }
+ if !matched {
+ return s
+ }
+ buf = append(buf, s[i:]...)
+ return string(buf)
+}
+
+func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ var i, wn int
+ for {
+ match := r.finder.next(s[i:])
+ if match == -1 {
+ break
+ }
+ wn, err = sw.WriteString(s[i : i+match])
+ n += wn
+ if err != nil {
+ return
+ }
+ wn, err = sw.WriteString(r.value)
+ n += wn
+ if err != nil {
+ return
+ }
+ i += match + len(r.finder.pattern)
+ }
+ wn, err = sw.WriteString(s[i:])
+ n += wn
+ return
+}
+
+// byteReplacer is the implementation that's used when all the "old"
+// and "new" values are single ASCII bytes.
+// The array contains replacement bytes indexed by old byte.
+type byteReplacer [256]byte
+
+func (r *byteReplacer) Replace(s string) string {
+ var buf []byte // lazily allocated
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r[b] != b {
+ if buf == nil {
+ buf = []byte(s)
+ }
+ buf[i] = r[b]
+ }
+ }
+ if buf == nil {
+ return s
+ }
+ return string(buf)
+}
+
+func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
+ bufsize := 32 << 10
+ if len(s) < bufsize {
+ bufsize = len(s)
+ }
+ buf := make([]byte, bufsize)
+
+ for len(s) > 0 {
+ ncopy := copy(buf, s)
+ s = s[ncopy:]
+ for i, b := range buf[:ncopy] {
+ buf[i] = r[b]
+ }
+ wn, err := w.Write(buf[:ncopy])
+ n += wn
+ if err != nil {
+ return n, err
+ }
+ }
+ return n, nil
+}
+
+// byteStringReplacer is the implementation that's used when all the
+// "old" values are single ASCII bytes but the "new" values vary in size.
+type byteStringReplacer struct {
+ // replacements contains replacement byte slices indexed by old byte.
+ // A nil []byte means that the old byte should not be replaced.
+ replacements [256][]byte
+ // toReplace keeps a list of bytes to replace. Depending on length of toReplace
+ // and length of target string it may be faster to use Count, or a plain loop.
+ // We store single byte as a string, because Count takes a string.
+ toReplace []string
+}
+
+// countCutOff controls the ratio of a string length to a number of replacements
+// at which (*byteStringReplacer).Replace switches algorithms.
+// For strings with higher ration of length to replacements than that value,
+// we call Count, for each replacement from toReplace.
+// For strings, with a lower ratio we use simple loop, because of Count overhead.
+// countCutOff is an empirically determined overhead multiplier.
+// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
+const countCutOff = 8
+
+func (r *byteStringReplacer) Replace(s string) string {
+ newSize := len(s)
+ anyChanges := false
+ // Is it faster to use Count?
+ if len(r.toReplace)*countCutOff <= len(s) {
+ for _, x := range r.toReplace {
+ if c := Count(s, x); c != 0 {
+ // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
+ newSize += c * (len(r.replacements[x[0]]) - 1)
+ anyChanges = true
+ }
+
+ }
+ } else {
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] != nil {
+ // See above for explanation of -1
+ newSize += len(r.replacements[b]) - 1
+ anyChanges = true
+ }
+ }
+ }
+ if !anyChanges {
+ return s
+ }
+ buf := make([]byte, newSize)
+ j := 0
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] != nil {
+ j += copy(buf[j:], r.replacements[b])
+ } else {
+ buf[j] = b
+ j++
+ }
+ }
+ return string(buf)
+}
+
+func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
+ sw := getStringWriter(w)
+ last := 0
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] == nil {
+ continue
+ }
+ if last != i {
+ nw, err := sw.WriteString(s[last:i])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ last = i + 1
+ nw, err := w.Write(r.replacements[b])
+ n += nw
+ if err != nil {
+ return n, err
+ }
+ }
+ if last != len(s) {
+ var nw int
+ nw, err = sw.WriteString(s[last:])
+ n += nw
+ }
+ return
+}