Adding upstream version 1.21.8.upstream/1.21.8

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:19:13 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:19:13 +0000
commit: ccd992355df7192993c666236047820244914598 (patch)
tree: f00fea65147227b7743083c6148396f74cd66935 /src/unicode/utf8
parent: Initial commit. (diff)
download: golang-1.21-ccd992355df7192993c666236047820244914598.tar.xz
golang-1.21-ccd992355df7192993c666236047820244914598.zip
3 files changed, 1512 insertions, 0 deletions
diff --git a/src/unicode/utf8/example_test.go b/src/unicode/utf8/example_test.go
new file mode 100644
index 0000000..fe434c9
--- /dev/null
+++ b/src/unicode/utf8/example_test.go
@@ -0,0 +1,226 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf8_test
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+func ExampleDecodeLastRune() {
+	b := []byte("Hello, 世界")
+
+	for len(b) > 0 {
+		r, size := utf8.DecodeLastRune(b)
+		fmt.Printf("%c %v\n", r, size)
+
+		b = b[:len(b)-size]
+	}
+	// Output:
+	// 界 3
+	// 世 3
+	//   1
+	// , 1
+	// o 1
+	// l 1
+	// l 1
+	// e 1
+	// H 1
+}
+
+func ExampleDecodeLastRuneInString() {
+	str := "Hello, 世界"
+
+	for len(str) > 0 {
+		r, size := utf8.DecodeLastRuneInString(str)
+		fmt.Printf("%c %v\n", r, size)
+
+		str = str[:len(str)-size]
+	}
+	// Output:
+	// 界 3
+	// 世 3
+	//   1
+	// , 1
+	// o 1
+	// l 1
+	// l 1
+	// e 1
+	// H 1
+
+}
+
+func ExampleDecodeRune() {
+	b := []byte("Hello, 世界")
+
+	for len(b) > 0 {
+		r, size := utf8.DecodeRune(b)
+		fmt.Printf("%c %v\n", r, size)
+
+		b = b[size:]
+	}
+	// Output:
+	// H 1
+	// e 1
+	// l 1
+	// l 1
+	// o 1
+	// , 1
+	//   1
+	// 世 3
+	// 界 3
+}
+
+func ExampleDecodeRuneInString() {
+	str := "Hello, 世界"
+
+	for len(str) > 0 {
+		r, size := utf8.DecodeRuneInString(str)
+		fmt.Printf("%c %v\n", r, size)
+
+		str = str[size:]
+	}
+	// Output:
+	// H 1
+	// e 1
+	// l 1
+	// l 1
+	// o 1
+	// , 1
+	//   1
+	// 世 3
+	// 界 3
+}
+
+func ExampleEncodeRune() {
+	r := '世'
+	buf := make([]byte, 3)
+
+	n := utf8.EncodeRune(buf, r)
+
+	fmt.Println(buf)
+	fmt.Println(n)
+	// Output:
+	// [228 184 150]
+	// 3
+}
+
+func ExampleEncodeRune_outOfRange() {
+	runes := []rune{
+		// Less than 0, out of range.
+		-1,
+		// Greater than 0x10FFFF, out of range.
+		0x110000,
+		// The Unicode replacement character.
+		utf8.RuneError,
+	}
+	for i, c := range runes {
+		buf := make([]byte, 3)
+		size := utf8.EncodeRune(buf, c)
+		fmt.Printf("%d: %d %[2]s %d\n", i, buf, size)
+	}
+	// Output:
+	// 0: [239 191 189] � 3
+	// 1: [239 191 189] � 3
+	// 2: [239 191 189] � 3
+}
+
+func ExampleFullRune() {
+	buf := []byte{228, 184, 150} // 世
+	fmt.Println(utf8.FullRune(buf))
+	fmt.Println(utf8.FullRune(buf[:2]))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleFullRuneInString() {
+	str := "世"
+	fmt.Println(utf8.FullRuneInString(str))
+	fmt.Println(utf8.FullRuneInString(str[:2]))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleRuneCount() {
+	buf := []byte("Hello, 世界")
+	fmt.Println("bytes =", len(buf))
+	fmt.Println("runes =", utf8.RuneCount(buf))
+	// Output:
+	// bytes = 13
+	// runes = 9
+}
+
+func ExampleRuneCountInString() {
+	str := "Hello, 世界"
+	fmt.Println("bytes =", len(str))
+	fmt.Println("runes =", utf8.RuneCountInString(str))
+	// Output:
+	// bytes = 13
+	// runes = 9
+}
+
+func ExampleRuneLen() {
+	fmt.Println(utf8.RuneLen('a'))
+	fmt.Println(utf8.RuneLen('界'))
+	// Output:
+	// 1
+	// 3
+}
+
+func ExampleRuneStart() {
+	buf := []byte("a界")
+	fmt.Println(utf8.RuneStart(buf[0]))
+	fmt.Println(utf8.RuneStart(buf[1]))
+	fmt.Println(utf8.RuneStart(buf[2]))
+	// Output:
+	// true
+	// true
+	// false
+}
+
+func ExampleValid() {
+	valid := []byte("Hello, 世界")
+	invalid := []byte{0xff, 0xfe, 0xfd}
+
+	fmt.Println(utf8.Valid(valid))
+	fmt.Println(utf8.Valid(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleValidRune() {
+	valid := 'a'
+	invalid := rune(0xfffffff)
+
+	fmt.Println(utf8.ValidRune(valid))
+	fmt.Println(utf8.ValidRune(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleValidString() {
+	valid := "Hello, 世界"
+	invalid := string([]byte{0xff, 0xfe, 0xfd})
+
+	fmt.Println(utf8.ValidString(valid))
+	fmt.Println(utf8.ValidString(invalid))
+	// Output:
+	// true
+	// false
+}
+
+func ExampleAppendRune() {
+	buf1 := utf8.AppendRune(nil, 0x10000)
+	buf2 := utf8.AppendRune([]byte("init"), 0x10000)
+	fmt.Println(string(buf1))
+	fmt.Println(string(buf2))
+	// Output:
+	// 𐀀
+	// init𐀀
+}
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
new file mode 100644
index 0000000..1e9f666
--- /dev/null
+++ b/src/unicode/utf8/utf8.go
@@ -0,0 +1,583 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package utf8 implements functions and constants to support text encoded in
+// UTF-8. It includes functions to translate between runes and UTF-8 byte sequences.
+// See https://en.wikipedia.org/wiki/UTF-8
+package utf8
+
+// The conditions RuneError==unicode.ReplacementChar and
+// MaxRune==unicode.MaxRune are verified in the tests.
+// Defining them locally avoids this package depending on package unicode.
+
+// Numbers fundamental to the encoding.
+const (
+	RuneError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
+	RuneSelf  = 0x80         // characters below RuneSelf are represented as themselves in a single byte.
+	MaxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
+	UTFMax    = 4            // maximum number of bytes of a UTF-8 encoded Unicode character.
+)
+
+// Code points in the surrogate range are not valid for UTF-8.
+const (
+	surrogateMin = 0xD800
+	surrogateMax = 0xDFFF
+)
+
+const (
+	t1 = 0b00000000
+	tx = 0b10000000
+	t2 = 0b11000000
+	t3 = 0b11100000
+	t4 = 0b11110000
+	t5 = 0b11111000
+
+	maskx = 0b00111111
+	mask2 = 0b00011111
+	mask3 = 0b00001111
+	mask4 = 0b00000111
+
+	rune1Max = 1<<7 - 1
+	rune2Max = 1<<11 - 1
+	rune3Max = 1<<16 - 1
+
+	// The default lowest and highest continuation byte.
+	locb = 0b10000000
+	hicb = 0b10111111
+
+	// These names of these constants are chosen to give nice alignment in the
+	// table below. The first nibble is an index into acceptRanges or F for
+	// special one-byte cases. The second nibble is the Rune length or the
+	// Status for the special one-byte case.
+	xx = 0xF1 // invalid: size 1
+	as = 0xF0 // ASCII: size 1
+	s1 = 0x02 // accept 0, size 2
+	s2 = 0x13 // accept 1, size 3
+	s3 = 0x03 // accept 0, size 3
+	s4 = 0x23 // accept 2, size 3
+	s5 = 0x34 // accept 3, size 4
+	s6 = 0x04 // accept 0, size 4
+	s7 = 0x44 // accept 4, size 4
+)
+
+// first is information about the first byte in a UTF-8 sequence.
+var first = [256]uint8{
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
+	xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
+	s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
+	s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
+	s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
+}
+
+// acceptRange gives the range of valid values for the second byte in a UTF-8
+// sequence.
+type acceptRange struct {
+	lo uint8 // lowest value for second byte.
+	hi uint8 // highest value for second byte.
+}
+
+// acceptRanges has size 16 to avoid bounds checks in the code that uses it.
+var acceptRanges = [16]acceptRange{
+	0: {locb, hicb},
+	1: {0xA0, hicb},
+	2: {locb, 0x9F},
+	3: {0x90, hicb},
+	4: {locb, 0x8F},
+}
+
+// FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune.
+// An invalid encoding is considered a full Rune since it will convert as a width-1 error rune.
+func FullRune(p []byte) bool {
+	n := len(p)
+	if n == 0 {
+		return false
+	}
+	x := first[p[0]]
+	if n >= int(x&7) {
+		return true // ASCII, invalid or valid.
+	}
+	// Must be short or invalid.
+	accept := acceptRanges[x>>4]
+	if n > 1 && (p[1] < accept.lo || accept.hi < p[1]) {
+		return true
+	} else if n > 2 && (p[2] < locb || hicb < p[2]) {
+		return true
+	}
+	return false
+}
+
+// FullRuneInString is like FullRune but its input is a string.
+func FullRuneInString(s string) bool {
+	n := len(s)
+	if n == 0 {
+		return false
+	}
+	x := first[s[0]]
+	if n >= int(x&7) {
+		return true // ASCII, invalid, or valid.
+	}
+	// Must be short or invalid.
+	accept := acceptRanges[x>>4]
+	if n > 1 && (s[1] < accept.lo || accept.hi < s[1]) {
+		return true
+	} else if n > 2 && (s[2] < locb || hicb < s[2]) {
+		return true
+	}
+	return false
+}
+
+// DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and
+// its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if
+// the encoding is invalid, it returns (RuneError, 1). Both are impossible
+// results for correct, non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeRune(p []byte) (r rune, size int) {
+	n := len(p)
+	if n < 1 {
+		return RuneError, 0
+	}
+	p0 := p[0]
+	x := first[p0]
+	if x >= as {
+		// The following code simulates an additional check for x == xx and
+		// handling the ASCII and invalid cases accordingly. This mask-and-or
+		// approach prevents an additional branch.
+		mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
+		return rune(p[0])&^mask | RuneError&mask, 1
+	}
+	sz := int(x & 7)
+	accept := acceptRanges[x>>4]
+	if n < sz {
+		return RuneError, 1
+	}
+	b1 := p[1]
+	if b1 < accept.lo || accept.hi < b1 {
+		return RuneError, 1
+	}
+	if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks
+		return rune(p0&mask2)<<6 | rune(b1&maskx), 2
+	}
+	b2 := p[2]
+	if b2 < locb || hicb < b2 {
+		return RuneError, 1
+	}
+	if sz <= 3 {
+		return rune(p0&mask3)<<12 | rune(b1&maskx)<<6 | rune(b2&maskx), 3
+	}
+	b3 := p[3]
+	if b3 < locb || hicb < b3 {
+		return RuneError, 1
+	}
+	return rune(p0&mask4)<<18 | rune(b1&maskx)<<12 | rune(b2&maskx)<<6 | rune(b3&maskx), 4
+}
+
+// DecodeRuneInString is like DecodeRune but its input is a string. If s is
+// empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it
+// returns (RuneError, 1). Both are impossible results for correct, non-empty
+// UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeRuneInString(s string) (r rune, size int) {
+	n := len(s)
+	if n < 1 {
+		return RuneError, 0
+	}
+	s0 := s[0]
+	x := first[s0]
+	if x >= as {
+		// The following code simulates an additional check for x == xx and
+		// handling the ASCII and invalid cases accordingly. This mask-and-or
+		// approach prevents an additional branch.
+		mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
+		return rune(s[0])&^mask | RuneError&mask, 1
+	}
+	sz := int(x & 7)
+	accept := acceptRanges[x>>4]
+	if n < sz {
+		return RuneError, 1
+	}
+	s1 := s[1]
+	if s1 < accept.lo || accept.hi < s1 {
+		return RuneError, 1
+	}
+	if sz <= 2 { // <= instead of == to help the compiler eliminate some bounds checks
+		return rune(s0&mask2)<<6 | rune(s1&maskx), 2
+	}
+	s2 := s[2]
+	if s2 < locb || hicb < s2 {
+		return RuneError, 1
+	}
+	if sz <= 3 {
+		return rune(s0&mask3)<<12 | rune(s1&maskx)<<6 | rune(s2&maskx), 3
+	}
+	s3 := s[3]
+	if s3 < locb || hicb < s3 {
+		return RuneError, 1
+	}
+	return rune(s0&mask4)<<18 | rune(s1&maskx)<<12 | rune(s2&maskx)<<6 | rune(s3&maskx), 4
+}
+
+// DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and
+// its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if
+// the encoding is invalid, it returns (RuneError, 1). Both are impossible
+// results for correct, non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeLastRune(p []byte) (r rune, size int) {
+	end := len(p)
+	if end == 0 {
+		return RuneError, 0
+	}
+	start := end - 1
+	r = rune(p[start])
+	if r < RuneSelf {
+		return r, 1
+	}
+	// guard against O(n^2) behavior when traversing
+	// backwards through strings with long sequences of
+	// invalid UTF-8.
+	lim := end - UTFMax
+	if lim < 0 {
+		lim = 0
+	}
+	for start--; start >= lim; start-- {
+		if RuneStart(p[start]) {
+			break
+		}
+	}
+	if start < 0 {
+		start = 0
+	}
+	r, size = DecodeRune(p[start:end])
+	if start+size != end {
+		return RuneError, 1
+	}
+	return r, size
+}
+
+// DecodeLastRuneInString is like DecodeLastRune but its input is a string. If
+// s is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid,
+// it returns (RuneError, 1). Both are impossible results for correct,
+// non-empty UTF-8.
+//
+// An encoding is invalid if it is incorrect UTF-8, encodes a rune that is
+// out of range, or is not the shortest possible UTF-8 encoding for the
+// value. No other validation is performed.
+func DecodeLastRuneInString(s string) (r rune, size int) {
+	end := len(s)
+	if end == 0 {
+		return RuneError, 0
+	}
+	start := end - 1
+	r = rune(s[start])
+	if r < RuneSelf {
+		return r, 1
+	}
+	// guard against O(n^2) behavior when traversing
+	// backwards through strings with long sequences of
+	// invalid UTF-8.
+	lim := end - UTFMax
+	if lim < 0 {
+		lim = 0
+	}
+	for start--; start >= lim; start-- {
+		if RuneStart(s[start]) {
+			break
+		}
+	}
+	if start < 0 {
+		start = 0
+	}
+	r, size = DecodeRuneInString(s[start:end])
+	if start+size != end {
+		return RuneError, 1
+	}
+	return r, size
+}
+
+// RuneLen returns the number of bytes required to encode the rune.
+// It returns -1 if the rune is not a valid value to encode in UTF-8.
+func RuneLen(r rune) int {
+	switch {
+	case r < 0:
+		return -1
+	case r <= rune1Max:
+		return 1
+	case r <= rune2Max:
+		return 2
+	case surrogateMin <= r && r <= surrogateMax:
+		return -1
+	case r <= rune3Max:
+		return 3
+	case r <= MaxRune:
+		return 4
+	}
+	return -1
+}
+
+// EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune.
+// If the rune is out of range, it writes the encoding of RuneError.
+// It returns the number of bytes written.
+func EncodeRune(p []byte, r rune) int {
+	// Negative values are erroneous. Making it unsigned addresses the problem.
+	switch i := uint32(r); {
+	case i <= rune1Max:
+		p[0] = byte(r)
+		return 1
+	case i <= rune2Max:
+		_ = p[1] // eliminate bounds checks
+		p[0] = t2 | byte(r>>6)
+		p[1] = tx | byte(r)&maskx
+		return 2
+	case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
+		r = RuneError
+		fallthrough
+	case i <= rune3Max:
+		_ = p[2] // eliminate bounds checks
+		p[0] = t3 | byte(r>>12)
+		p[1] = tx | byte(r>>6)&maskx
+		p[2] = tx | byte(r)&maskx
+		return 3
+	default:
+		_ = p[3] // eliminate bounds checks
+		p[0] = t4 | byte(r>>18)
+		p[1] = tx | byte(r>>12)&maskx
+		p[2] = tx | byte(r>>6)&maskx
+		p[3] = tx | byte(r)&maskx
+		return 4
+	}
+}
+
+// AppendRune appends the UTF-8 encoding of r to the end of p and
+// returns the extended buffer. If the rune is out of range,
+// it appends the encoding of RuneError.
+func AppendRune(p []byte, r rune) []byte {
+	// This function is inlineable for fast handling of ASCII.
+	if uint32(r) <= rune1Max {
+		return append(p, byte(r))
+	}
+	return appendRuneNonASCII(p, r)
+}
+
+func appendRuneNonASCII(p []byte, r rune) []byte {
+	// Negative values are erroneous. Making it unsigned addresses the problem.
+	switch i := uint32(r); {
+	case i <= rune2Max:
+		return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
+	case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
+		r = RuneError
+		fallthrough
+	case i <= rune3Max:
+		return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
+	default:
+		return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
+	}
+}
+
+// RuneCount returns the number of runes in p. Erroneous and short
+// encodings are treated as single runes of width 1 byte.
+func RuneCount(p []byte) int {
+	np := len(p)
+	var n int
+	for i := 0; i < np; {
+		n++
+		c := p[i]
+		if c < RuneSelf {
+			// ASCII fast path
+			i++
+			continue
+		}
+		x := first[c]
+		if x == xx {
+			i++ // invalid.
+			continue
+		}
+		size := int(x & 7)
+		if i+size > np {
+			i++ // Short or invalid.
+			continue
+		}
+		accept := acceptRanges[x>>4]
+		if c := p[i+1]; c < accept.lo || accept.hi < c {
+			size = 1
+		} else if size == 2 {
+		} else if c := p[i+2]; c < locb || hicb < c {
+			size = 1
+		} else if size == 3 {
+		} else if c := p[i+3]; c < locb || hicb < c {
+			size = 1
+		}
+		i += size
+	}
+	return n
+}
+
+// RuneCountInString is like RuneCount but its input is a string.
+func RuneCountInString(s string) (n int) {
+	ns := len(s)
+	for i := 0; i < ns; n++ {
+		c := s[i]
+		if c < RuneSelf {
+			// ASCII fast path
+			i++
+			continue
+		}
+		x := first[c]
+		if x == xx {
+			i++ // invalid.
+			continue
+		}
+		size := int(x & 7)
+		if i+size > ns {
+			i++ // Short or invalid.
+			continue
+		}
+		accept := acceptRanges[x>>4]
+		if c := s[i+1]; c < accept.lo || accept.hi < c {
+			size = 1
+		} else if size == 2 {
+		} else if c := s[i+2]; c < locb || hicb < c {
+			size = 1
+		} else if size == 3 {
+		} else if c := s[i+3]; c < locb || hicb < c {
+			size = 1
+		}
+		i += size
+	}
+	return n
+}
+
+// RuneStart reports whether the byte could be the first byte of an encoded,
+// possibly invalid rune. Second and subsequent bytes always have the top two
+// bits set to 10.
+func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
+
+// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
+func Valid(p []byte) bool {
+	// This optimization avoids the need to recompute the capacity
+	// when generating code for p[8:], bringing it to parity with
+	// ValidString, which was 20% faster on long ASCII strings.
+	p = p[:len(p):len(p)]
+
+	// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+	for len(p) >= 8 {
+		// Combining two 32 bit loads allows the same code to be used
+		// for 32 and 64 bit platforms.
+		// The compiler can generate a 32bit load for first32 and second32
+		// on many platforms. See test/codegen/memcombine.go.
+		first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+		second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
+		if (first32|second32)&0x80808080 != 0 {
+			// Found a non ASCII byte (>= RuneSelf).
+			break
+		}
+		p = p[8:]
+	}
+	n := len(p)
+	for i := 0; i < n; {
+		pi := p[i]
+		if pi < RuneSelf {
+			i++
+			continue
+		}
+		x := first[pi]
+		if x == xx {
+			return false // Illegal starter byte.
+		}
+		size := int(x & 7)
+		if i+size > n {
+			return false // Short or invalid.
+		}
+		accept := acceptRanges[x>>4]
+		if c := p[i+1]; c < accept.lo || accept.hi < c {
+			return false
+		} else if size == 2 {
+		} else if c := p[i+2]; c < locb || hicb < c {
+			return false
+		} else if size == 3 {
+		} else if c := p[i+3]; c < locb || hicb < c {
+			return false
+		}
+		i += size
+	}
+	return true
+}
+
+// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
+func ValidString(s string) bool {
+	// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+	for len(s) >= 8 {
+		// Combining two 32 bit loads allows the same code to be used
+		// for 32 and 64 bit platforms.
+		// The compiler can generate a 32bit load for first32 and second32
+		// on many platforms. See test/codegen/memcombine.go.
+		first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+		second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24
+		if (first32|second32)&0x80808080 != 0 {
+			// Found a non ASCII byte (>= RuneSelf).
+			break
+		}
+		s = s[8:]
+	}
+	n := len(s)
+	for i := 0; i < n; {
+		si := s[i]
+		if si < RuneSelf {
+			i++
+			continue
+		}
+		x := first[si]
+		if x == xx {
+			return false // Illegal starter byte.
+		}
+		size := int(x & 7)
+		if i+size > n {
+			return false // Short or invalid.
+		}
+		accept := acceptRanges[x>>4]
+		if c := s[i+1]; c < accept.lo || accept.hi < c {
+			return false
+		} else if size == 2 {
+		} else if c := s[i+2]; c < locb || hicb < c {
+			return false
+		} else if size == 3 {
+		} else if c := s[i+3]; c < locb || hicb < c {
+			return false
+		}
+		i += size
+	}
+	return true
+}
+
+// ValidRune reports whether r can be legally encoded as UTF-8.
+// Code points that are out of range or a surrogate half are illegal.
+func ValidRune(r rune) bool {
+	switch {
+	case 0 <= r && r < surrogateMin:
+		return true
+	case surrogateMax < r && r <= MaxRune:
+		return true
+	}
+	return false
+}
diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go
new file mode 100644
index 0000000..19a04dc
--- /dev/null
+++ b/src/unicode/utf8/utf8_test.go
@@ -0,0 +1,703 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf8_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+	"unicode"
+	. "unicode/utf8"
+)
+
+// Validate the constants redefined from unicode.
+func init() {
+	if MaxRune != unicode.MaxRune {
+		panic("utf8.MaxRune is wrong")
+	}
+	if RuneError != unicode.ReplacementChar {
+		panic("utf8.RuneError is wrong")
+	}
+}
+
+// Validate the constants redefined from unicode.
+func TestConstants(t *testing.T) {
+	if MaxRune != unicode.MaxRune {
+		t.Errorf("utf8.MaxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
+	}
+	if RuneError != unicode.ReplacementChar {
+		t.Errorf("utf8.RuneError is wrong: %x should be %x", RuneError, unicode.ReplacementChar)
+	}
+}
+
+type Utf8Map struct {
+	r   rune
+	str string
+}
+
+var utf8map = []Utf8Map{
+	{0x0000, "\x00"},
+	{0x0001, "\x01"},
+	{0x007e, "\x7e"},
+	{0x007f, "\x7f"},
+	{0x0080, "\xc2\x80"},
+	{0x0081, "\xc2\x81"},
+	{0x00bf, "\xc2\xbf"},
+	{0x00c0, "\xc3\x80"},
+	{0x00c1, "\xc3\x81"},
+	{0x00c8, "\xc3\x88"},
+	{0x00d0, "\xc3\x90"},
+	{0x00e0, "\xc3\xa0"},
+	{0x00f0, "\xc3\xb0"},
+	{0x00f8, "\xc3\xb8"},
+	{0x00ff, "\xc3\xbf"},
+	{0x0100, "\xc4\x80"},
+	{0x07ff, "\xdf\xbf"},
+	{0x0400, "\xd0\x80"},
+	{0x0800, "\xe0\xa0\x80"},
+	{0x0801, "\xe0\xa0\x81"},
+	{0x1000, "\xe1\x80\x80"},
+	{0xd000, "\xed\x80\x80"},
+	{0xd7ff, "\xed\x9f\xbf"}, // last code point before surrogate half.
+	{0xe000, "\xee\x80\x80"}, // first code point after surrogate half.
+	{0xfffe, "\xef\xbf\xbe"},
+	{0xffff, "\xef\xbf\xbf"},
+	{0x10000, "\xf0\x90\x80\x80"},
+	{0x10001, "\xf0\x90\x80\x81"},
+	{0x40000, "\xf1\x80\x80\x80"},
+	{0x10fffe, "\xf4\x8f\xbf\xbe"},
+	{0x10ffff, "\xf4\x8f\xbf\xbf"},
+	{0xFFFD, "\xef\xbf\xbd"},
+}
+
+var surrogateMap = []Utf8Map{
+	{0xd800, "\xed\xa0\x80"}, // surrogate min decodes to (RuneError, 1)
+	{0xdfff, "\xed\xbf\xbf"}, // surrogate max decodes to (RuneError, 1)
+}
+
+var testStrings = []string{
+	"",
+	"abcd",
+	"☺☻☹",
+	"日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+	"日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©日a本b語ç日ð本Ê語þ日¥本¼語i日©",
+	"\x80\x80\x80\x80",
+}
+
+func TestFullRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		if !FullRune(b) {
+			t.Errorf("FullRune(%q) (%U) = false, want true", b, m.r)
+		}
+		s := m.str
+		if !FullRuneInString(s) {
+			t.Errorf("FullRuneInString(%q) (%U) = false, want true", s, m.r)
+		}
+		b1 := b[0 : len(b)-1]
+		if FullRune(b1) {
+			t.Errorf("FullRune(%q) = true, want false", b1)
+		}
+		s1 := string(b1)
+		if FullRuneInString(s1) {
+			t.Errorf("FullRune(%q) = true, want false", s1)
+		}
+	}
+	for _, s := range []string{"\xc0", "\xc1"} {
+		b := []byte(s)
+		if !FullRune(b) {
+			t.Errorf("FullRune(%q) = false, want true", s)
+		}
+		if !FullRuneInString(s) {
+			t.Errorf("FullRuneInString(%q) = false, want true", s)
+		}
+	}
+}
+
+func TestEncodeRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		var buf [10]byte
+		n := EncodeRune(buf[0:], m.r)
+		b1 := buf[0:n]
+		if !bytes.Equal(b, b1) {
+			t.Errorf("EncodeRune(%#04x) = %q want %q", m.r, b1, b)
+		}
+	}
+}
+
+func TestAppendRune(t *testing.T) {
+	for _, m := range utf8map {
+		if buf := AppendRune(nil, m.r); string(buf) != m.str {
+			t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
+		}
+		if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
+			t.Errorf("AppendRune(init, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
+		}
+	}
+}
+
+func TestDecodeRune(t *testing.T) {
+	for _, m := range utf8map {
+		b := []byte(m.str)
+		r, size := DecodeRune(b)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
+		}
+		s := m.str
+		r, size = DecodeRuneInString(s)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
+		}
+
+		// there's an extra byte that bytes left behind - make sure trailing byte works
+		r, size = DecodeRune(b[0:cap(b)])
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, m.r, len(b))
+		}
+		s = m.str + "\x00"
+		r, size = DecodeRuneInString(s)
+		if r != m.r || size != len(b) {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, m.r, len(b))
+		}
+
+		// make sure missing bytes fail
+		wantsize := 1
+		if wantsize >= len(b) {
+			wantsize = 0
+		}
+		r, size = DecodeRune(b[0 : len(b)-1])
+		if r != RuneError || size != wantsize {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], r, size, RuneError, wantsize)
+		}
+		s = m.str[0 : len(m.str)-1]
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != wantsize {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, wantsize)
+		}
+
+		// make sure bad sequences fail
+		if len(b) == 1 {
+			b[0] = 0x80
+		} else {
+			b[len(b)-1] = 0x7F
+		}
+		r, size = DecodeRune(b)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, r, size, RuneError, 1)
+		}
+		s = string(b)
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, r, size, RuneError, 1)
+		}
+
+	}
+}
+
+func TestDecodeSurrogateRune(t *testing.T) {
+	for _, m := range surrogateMap {
+		b := []byte(m.str)
+		r, size := DecodeRune(b)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRune(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+		}
+		s := m.str
+		r, size = DecodeRuneInString(s)
+		if r != RuneError || size != 1 {
+			t.Errorf("DecodeRuneInString(%q) = %x, %d want %x, %d", b, r, size, RuneError, 1)
+		}
+	}
+}
+
+// Check that DecodeRune and DecodeLastRune correspond to
+// the equivalent range loop.
+func TestSequencing(t *testing.T) {
+	for _, ts := range testStrings {
+		for _, m := range utf8map {
+			for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
+				testSequence(t, s)
+			}
+		}
+	}
+}
+
+func runtimeRuneCount(s string) int {
+	return len([]rune(s)) // Replaced by gc with call to runtime.countrunes(s).
+}
+
+// Check that a range loop, len([]rune(string)) optimization and
+// []rune conversions visit the same runes.
+// Not really a test of this package, but the assumption is used here and
+// it's good to verify.
+func TestRuntimeConversion(t *testing.T) {
+	for _, ts := range testStrings {
+		count := RuneCountInString(ts)
+		if n := runtimeRuneCount(ts); n != count {
+			t.Errorf("%q: len([]rune()) counted %d runes; got %d from RuneCountInString", ts, n, count)
+			break
+		}
+
+		runes := []rune(ts)
+		if n := len(runes); n != count {
+			t.Errorf("%q: []rune() has length %d; got %d from RuneCountInString", ts, n, count)
+			break
+		}
+		i := 0
+		for _, r := range ts {
+			if r != runes[i] {
+				t.Errorf("%q[%d]: expected %c (%U); got %c (%U)", ts, i, runes[i], runes[i], r, r)
+			}
+			i++
+		}
+	}
+}
+
+var invalidSequenceTests = []string{
+	"\xed\xa0\x80\x80", // surrogate min
+	"\xed\xbf\xbf\x80", // surrogate max
+
+	// xx
+	"\x91\x80\x80\x80",
+
+	// s1
+	"\xC2\x7F\x80\x80",
+	"\xC2\xC0\x80\x80",
+	"\xDF\x7F\x80\x80",
+	"\xDF\xC0\x80\x80",
+
+	// s2
+	"\xE0\x9F\xBF\x80",
+	"\xE0\xA0\x7F\x80",
+	"\xE0\xBF\xC0\x80",
+	"\xE0\xC0\x80\x80",
+
+	// s3
+	"\xE1\x7F\xBF\x80",
+	"\xE1\x80\x7F\x80",
+	"\xE1\xBF\xC0\x80",
+	"\xE1\xC0\x80\x80",
+
+	//s4
+	"\xED\x7F\xBF\x80",
+	"\xED\x80\x7F\x80",
+	"\xED\x9F\xC0\x80",
+	"\xED\xA0\x80\x80",
+
+	// s5
+	"\xF0\x8F\xBF\xBF",
+	"\xF0\x90\x7F\xBF",
+	"\xF0\x90\x80\x7F",
+	"\xF0\xBF\xBF\xC0",
+	"\xF0\xBF\xC0\x80",
+	"\xF0\xC0\x80\x80",
+
+	// s6
+	"\xF1\x7F\xBF\xBF",
+	"\xF1\x80\x7F\xBF",
+	"\xF1\x80\x80\x7F",
+	"\xF1\xBF\xBF\xC0",
+	"\xF1\xBF\xC0\x80",
+	"\xF1\xC0\x80\x80",
+
+	// s7
+	"\xF4\x7F\xBF\xBF",
+	"\xF4\x80\x7F\xBF",
+	"\xF4\x80\x80\x7F",
+	"\xF4\x8F\xBF\xC0",
+	"\xF4\x8F\xC0\x80",
+	"\xF4\x90\x80\x80",
+}
+
+func runtimeDecodeRune(s string) rune {
+	for _, r := range s {
+		return r
+	}
+	return -1
+}
+
+func TestDecodeInvalidSequence(t *testing.T) {
+	for _, s := range invalidSequenceTests {
+		r1, _ := DecodeRune([]byte(s))
+		if want := RuneError; r1 != want {
+			t.Errorf("DecodeRune(%#x) = %#04x, want %#04x", s, r1, want)
+			return
+		}
+		r2, _ := DecodeRuneInString(s)
+		if want := RuneError; r2 != want {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s, r2, want)
+			return
+		}
+		if r1 != r2 {
+			t.Errorf("DecodeRune(%#x) = %#04x mismatch with DecodeRuneInString(%q) = %#04x", s, r1, s, r2)
+			return
+		}
+		r3 := runtimeDecodeRune(s)
+		if r2 != r3 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x mismatch with runtime.decoderune(%q) = %#04x", s, r2, s, r3)
+			return
+		}
+	}
+}
+
+func testSequence(t *testing.T, s string) {
+	type info struct {
+		index int
+		r     rune
+	}
+	index := make([]info, len(s))
+	b := []byte(s)
+	si := 0
+	j := 0
+	for i, r := range s {
+		if si != i {
+			t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
+			return
+		}
+		index[j] = info{i, r}
+		j++
+		r1, size1 := DecodeRune(b[i:])
+		if r != r1 {
+			t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], r1, r)
+			return
+		}
+		r2, size2 := DecodeRuneInString(s[i:])
+		if r != r2 {
+			t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], r2, r)
+			return
+		}
+		if size1 != size2 {
+			t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
+			return
+		}
+		si += size1
+	}
+	j--
+	for si = len(s); si > 0; {
+		r1, size1 := DecodeLastRune(b[0:si])
+		r2, size2 := DecodeLastRuneInString(s[0:si])
+		if size1 != size2 {
+			t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
+			return
+		}
+		if r1 != index[j].r {
+			t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, r1, index[j].r)
+			return
+		}
+		if r2 != index[j].r {
+			t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, r2, index[j].r)
+			return
+		}
+		si -= size1
+		if si != index[j].index {
+			t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
+			return
+		}
+		j--
+	}
+	if si != 0 {
+		t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
+	}
+}
+
+// Check that negative runes encode as U+FFFD.
+func TestNegativeRune(t *testing.T) {
+	errorbuf := make([]byte, UTFMax)
+	errorbuf = errorbuf[0:EncodeRune(errorbuf, RuneError)]
+	buf := make([]byte, UTFMax)
+	buf = buf[0:EncodeRune(buf, -1)]
+	if !bytes.Equal(buf, errorbuf) {
+		t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
+	}
+}
+
+type RuneCountTest struct {
+	in  string
+	out int
+}
+
+var runecounttests = []RuneCountTest{
+	{"abcd", 4},
+	{"☺☻☹", 3},
+	{"1,2,3,4", 7},
+	{"\xe2\x00", 2},
+	{"\xe2\x80", 2},
+	{"a\xe2\x80", 3},
+}
+
+func TestRuneCount(t *testing.T) {
+	for _, tt := range runecounttests {
+		if out := RuneCountInString(tt.in); out != tt.out {
+			t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out)
+		}
+		if out := RuneCount([]byte(tt.in)); out != tt.out {
+			t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out)
+		}
+	}
+}
+
+type RuneLenTest struct {
+	r    rune
+	size int
+}
+
+var runelentests = []RuneLenTest{
+	{0, 1},
+	{'e', 1},
+	{'é', 2},
+	{'☺', 3},
+	{RuneError, 3},
+	{MaxRune, 4},
+	{0xD800, -1},
+	{0xDFFF, -1},
+	{MaxRune + 1, -1},
+	{-1, -1},
+}
+
+func TestRuneLen(t *testing.T) {
+	for _, tt := range runelentests {
+		if size := RuneLen(tt.r); size != tt.size {
+			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, size, tt.size)
+		}
+	}
+}
+
+type ValidTest struct {
+	in  string
+	out bool
+}
+
+var validTests = []ValidTest{
+	{"", true},
+	{"a", true},
+	{"abc", true},
+	{"Ж", true},
+	{"ЖЖ", true},
+	{"брэд-ЛГТМ", true},
+	{"☺☻☹", true},
+	{"aa\xe2", false},
+	{string([]byte{66, 250}), false},
+	{string([]byte{66, 250, 67}), false},
+	{"a\uFFFDb", true},
+	{string("\xF4\x8F\xBF\xBF"), true},      // U+10FFFF
+	{string("\xF4\x90\x80\x80"), false},     // U+10FFFF+1; out of range
+	{string("\xF7\xBF\xBF\xBF"), false},     // 0x1FFFFF; out of range
+	{string("\xFB\xBF\xBF\xBF\xBF"), false}, // 0x3FFFFFF; out of range
+	{string("\xc0\x80"), false},             // U+0000 encoded in two bytes: incorrect
+	{string("\xed\xa0\x80"), false},         // U+D800 high surrogate (sic)
+	{string("\xed\xbf\xbf"), false},         // U+DFFF low surrogate (sic)
+}
+
+func TestValid(t *testing.T) {
+	for _, tt := range validTests {
+		if Valid([]byte(tt.in)) != tt.out {
+			t.Errorf("Valid(%q) = %v; want %v", tt.in, !tt.out, tt.out)
+		}
+		if ValidString(tt.in) != tt.out {
+			t.Errorf("ValidString(%q) = %v; want %v", tt.in, !tt.out, tt.out)
+		}
+	}
+}
+
+type ValidRuneTest struct {
+	r  rune
+	ok bool
+}
+
+var validrunetests = []ValidRuneTest{
+	{0, true},
+	{'e', true},
+	{'é', true},
+	{'☺', true},
+	{RuneError, true},
+	{MaxRune, true},
+	{0xD7FF, true},
+	{0xD800, false},
+	{0xDFFF, false},
+	{0xE000, true},
+	{MaxRune + 1, false},
+	{-1, false},
+}
+
+func TestValidRune(t *testing.T) {
+	for _, tt := range validrunetests {
+		if ok := ValidRune(tt.r); ok != tt.ok {
+			t.Errorf("ValidRune(%#U) = %t, want %t", tt.r, ok, tt.ok)
+		}
+	}
+}
+
+func BenchmarkRuneCountTenASCIIChars(b *testing.B) {
+	s := []byte("0123456789")
+	for i := 0; i < b.N; i++ {
+		RuneCount(s)
+	}
+}
+
+func BenchmarkRuneCountTenJapaneseChars(b *testing.B) {
+	s := []byte("日本語日本語日本語日")
+	for i := 0; i < b.N; i++ {
+		RuneCount(s)
+	}
+}
+
+func BenchmarkRuneCountInStringTenASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		RuneCountInString("0123456789")
+	}
+}
+
+func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		RuneCountInString("日本語日本語日本語日")
+	}
+}
+
+var ascii100000 = strings.Repeat("0123456789", 10000)
+
+func BenchmarkValidTenASCIIChars(b *testing.B) {
+	s := []byte("0123456789")
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+
+func BenchmarkValid100KASCIIChars(b *testing.B) {
+	s := []byte(ascii100000)
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+
+func BenchmarkValidTenJapaneseChars(b *testing.B) {
+	s := []byte("日本語日本語日本語日")
+	for i := 0; i < b.N; i++ {
+		Valid(s)
+	}
+}
+func BenchmarkValidLongMostlyASCII(b *testing.B) {
+	longMostlyASCII := []byte(longStringMostlyASCII)
+	for i := 0; i < b.N; i++ {
+		Valid(longMostlyASCII)
+	}
+}
+
+func BenchmarkValidLongJapanese(b *testing.B) {
+	longJapanese := []byte(longStringJapanese)
+	for i := 0; i < b.N; i++ {
+		Valid(longJapanese)
+	}
+}
+
+func BenchmarkValidStringTenASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString("0123456789")
+	}
+}
+
+func BenchmarkValidString100KASCIIChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(ascii100000)
+	}
+}
+
+func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString("日本語日本語日本語日")
+	}
+}
+
+func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(longStringMostlyASCII)
+	}
+}
+
+func BenchmarkValidStringLongJapanese(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ValidString(longStringJapanese)
+	}
+}
+
+var longStringMostlyASCII string // ~100KB, ~97% ASCII
+var longStringJapanese string    // ~100KB, non-ASCII
+
+func init() {
+	const japanese = "日本語日本語日本語日"
+	var b strings.Builder
+	for i := 0; b.Len() < 100_000; i++ {
+		if i%100 == 0 {
+			b.WriteString(japanese)
+		} else {
+			b.WriteString("0123456789")
+		}
+	}
+	longStringMostlyASCII = b.String()
+	longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
+}
+
+func BenchmarkEncodeASCIIRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		EncodeRune(buf, 'a')
+	}
+}
+
+func BenchmarkEncodeJapaneseRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		EncodeRune(buf, '本')
+	}
+}
+
+func BenchmarkAppendASCIIRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		AppendRune(buf[:0], 'a')
+	}
+}
+
+func BenchmarkAppendJapaneseRune(b *testing.B) {
+	buf := make([]byte, UTFMax)
+	for i := 0; i < b.N; i++ {
+		AppendRune(buf[:0], '本')
+	}
+}
+
+func BenchmarkDecodeASCIIRune(b *testing.B) {
+	a := []byte{'a'}
+	for i := 0; i < b.N; i++ {
+		DecodeRune(a)
+	}
+}
+
+func BenchmarkDecodeJapaneseRune(b *testing.B) {
+	nihon := []byte("本")
+	for i := 0; i < b.N; i++ {
+		DecodeRune(nihon)
+	}
+}
+
+// boolSink is used to reference the return value of benchmarked
+// functions to avoid dead code elimination.
+var boolSink bool
+
+func BenchmarkFullRune(b *testing.B) {
+	benchmarks := []struct {
+		name string
+		data []byte
+	}{
+		{"ASCII", []byte("a")},
+		{"Incomplete", []byte("\xf0\x90\x80")},
+		{"Japanese", []byte("本")},
+	}
+	for _, bm := range benchmarks {
+		b.Run(bm.name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				boolSink = FullRune(bm.data)
+			}
+		})
+	}
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:19:13 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:19:13 +0000
commit	ccd992355df7192993c666236047820244914598 (patch)
tree	f00fea65147227b7743083c6148396f74cd66935 /src/unicode/utf8
parent	Initial commit. (diff)
download	golang-1.21-ccd992355df7192993c666236047820244914598.tar.xz golang-1.21-ccd992355df7192993c666236047820244914598.zip