Adding upstream version 1.17.13.upstream/1.17.13 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 13:15:26 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 13:15:26 +0000
commit: 82539ad8d59729fb45b0bb0edda8f2bddb719eb1 (patch)
tree: 58f0b58e6f44f0e04d4a6373132cf426fa835fa7 /src/internal
parent: Initial commit. (diff)
download: golang-1.17-82539ad8d59729fb45b0bb0edda8f2bddb719eb1.tar.xz
golang-1.17-82539ad8d59729fb45b0bb0edda8f2bddb719eb1.zip
313 files changed, 30139 insertions, 0 deletions
diff --git a/src/internal/abi/abi.go b/src/internal/abi/abi.go
new file mode 100644
index 0000000..aa5083a
--- /dev/null
+++ b/src/internal/abi/abi.go
@@ -0,0 +1,95 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package abi
+
+import "unsafe"
+
+// RegArgs is a struct that has space for each argument
+// and return value register on the current architecture.
+//
+// Assembly code knows the layout of the first two fields
+// of RegArgs.
+//
+// RegArgs also contains additional space to hold pointers
+// when it may not be safe to keep them only in the integer
+// register space otherwise.
+type RegArgs struct {
+	Ints   [IntArgRegs]uintptr  // untyped integer registers
+	Floats [FloatArgRegs]uint64 // untyped float registers
+
+	// Fields above this point are known to assembly.
+
+	// Ptrs is a space that duplicates Ints but with pointer type,
+	// used to make pointers passed or returned  in registers
+	// visible to the GC by making the type unsafe.Pointer.
+	Ptrs [IntArgRegs]unsafe.Pointer
+
+	// ReturnIsPtr is a bitmap that indicates which registers
+	// contain or will contain pointers on the return path from
+	// a reflectcall. The i'th bit indicates whether the i'th
+	// register contains or will contain a valid Go pointer.
+	ReturnIsPtr IntArgRegBitmap
+}
+
+func (r *RegArgs) Dump() {
+	print("Ints:")
+	for _, x := range r.Ints {
+		print(" ", x)
+	}
+	println()
+	print("Floats:")
+	for _, x := range r.Floats {
+		print(" ", x)
+	}
+	println()
+	print("Ptrs:")
+	for _, x := range r.Ptrs {
+		print(" ", x)
+	}
+	println()
+}
+
+// IntArgRegBitmap is a bitmap large enough to hold one bit per
+// integer argument/return register.
+type IntArgRegBitmap [(IntArgRegs + 7) / 8]uint8
+
+// Set sets the i'th bit of the bitmap to 1.
+func (b *IntArgRegBitmap) Set(i int) {
+	b[i/8] |= uint8(1) << (i % 8)
+}
+
+// Get returns whether the i'th bit of the bitmap is set.
+//
+// nosplit because it's called in extremely sensitive contexts, like
+// on the reflectcall return path.
+//
+//go:nosplit
+func (b *IntArgRegBitmap) Get(i int) bool {
+	return b[i/8]&(uint8(1)<<(i%8)) != 0
+}
+
+// FuncPC* intrinsics.
+//
+// CAREFUL: In programs with plugins, FuncPC* can return different values
+// for the same function (because there are actually multiple copies of
+// the same function in the address space). To be safe, don't use the
+// results of this function in any == expression. It is only safe to
+// use the result as an address at which to start executing code.
+
+// FuncPCABI0 returns the entry PC of the function f, which must be a
+// direct reference of a function defined as ABI0. Otherwise it is a
+// compile-time error.
+//
+// Implemented as a compile intrinsic.
+func FuncPCABI0(f interface{}) uintptr
+
+// FuncPCABIInternal returns the entry PC of the function f. If f is a
+// direct reference of a function, it must be defined as ABIInternal.
+// Otherwise it is a compile-time error. If f is not a direct reference
+// of a defined function, it assumes that f is a func value. Otherwise
+// the behavior is undefined.
+//
+// Implemented as a compile intrinsic.
+func FuncPCABIInternal(f interface{}) uintptr
diff --git a/src/internal/abi/abi_amd64.go b/src/internal/abi/abi_amd64.go
new file mode 100644
index 0000000..aff71f6
--- /dev/null
+++ b/src/internal/abi/abi_amd64.go
@@ -0,0 +1,21 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.regabireflect
+// +build goexperiment.regabireflect
+
+package abi
+
+const (
+	// See abi_generic.go.
+
+	// RAX, RBX, RCX, RDI, RSI, R8, R9, R10, R11.
+	IntArgRegs = 9
+
+	// X0 -> X14.
+	FloatArgRegs = 15
+
+	// We use SSE2 registers which support 64-bit float operations.
+	EffectiveFloatRegSize = 8
+)
diff --git a/src/internal/abi/abi_generic.go b/src/internal/abi/abi_generic.go
new file mode 100644
index 0000000..69400f9
--- /dev/null
+++ b/src/internal/abi/abi_generic.go
@@ -0,0 +1,39 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !goexperiment.regabireflect
+// +build !goexperiment.regabireflect
+
+package abi
+
+const (
+	// ABI-related constants.
+	//
+	// In the generic case, these are all zero
+	// which lets them gracefully degrade to ABI0.
+
+	// IntArgRegs is the number of registers dedicated
+	// to passing integer argument values. Result registers are identical
+	// to argument registers, so this number is used for those too.
+	IntArgRegs = 0
+
+	// FloatArgRegs is the number of registers dedicated
+	// to passing floating-point argument values. Result registers are
+	// identical to argument registers, so this number is used for
+	// those too.
+	FloatArgRegs = 0
+
+	// EffectiveFloatRegSize describes the width of floating point
+	// registers on the current platform from the ABI's perspective.
+	//
+	// Since Go only supports 32-bit and 64-bit floating point primitives,
+	// this number should be either 0, 4, or 8. 0 indicates no floating
+	// point registers for the ABI or that floating point values will be
+	// passed via the softfloat ABI.
+	//
+	// For platforms that support larger floating point register widths,
+	// such as x87's 80-bit "registers" (not that we support x87 currently),
+	// use 8.
+	EffectiveFloatRegSize = 0
+)
diff --git a/src/internal/abi/abi_test.go b/src/internal/abi/abi_test.go
new file mode 100644
index 0000000..5a3b6b6
--- /dev/null
+++ b/src/internal/abi/abi_test.go
@@ -0,0 +1,76 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package abi_test
+
+import (
+	"internal/abi"
+	"internal/testenv"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestFuncPC(t *testing.T) {
+	// Test that FuncPC* can get correct function PC.
+	pcFromAsm := abi.FuncPCTestFnAddr
+
+	// Test FuncPC for locally defined function
+	pcFromGo := abi.FuncPCTest()
+	if pcFromGo != pcFromAsm {
+		t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
+	}
+
+	// Test FuncPC for imported function
+	pcFromGo = abi.FuncPCABI0(abi.FuncPCTestFn)
+	if pcFromGo != pcFromAsm {
+		t.Errorf("FuncPC returns wrong PC, want %x, got %x", pcFromAsm, pcFromGo)
+	}
+}
+
+func TestFuncPCCompileError(t *testing.T) {
+	// Test that FuncPC* on a function of a mismatched ABI is rejected.
+	testenv.MustHaveGoBuild(t)
+
+	// We want to test internal package, which we cannot normally import.
+	// Run the assembler and compiler manually.
+	tmpdir := t.TempDir()
+	asmSrc := filepath.Join("testdata", "x.s")
+	goSrc := filepath.Join("testdata", "x.go")
+	symabi := filepath.Join(tmpdir, "symabi")
+	obj := filepath.Join(tmpdir, "x.o")
+
+	// parse assembly code for symabi.
+	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-gensymabis", "-o", symabi, asmSrc)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("go tool asm -gensymabis failed: %v\n%s", err, out)
+	}
+
+	// compile go code.
+	cmd = exec.Command(testenv.GoToolPath(t), "tool", "compile", "-symabis", symabi, "-o", obj, goSrc)
+	out, err = cmd.CombinedOutput()
+	if err == nil {
+		t.Fatalf("go tool compile did not fail")
+	}
+
+	// Expect errors in line 17, 18, 20, no errors on other lines.
+	want := []string{"x.go:17", "x.go:18", "x.go:20"}
+	got := strings.Split(string(out), "\n")
+	if got[len(got)-1] == "" {
+		got = got[:len(got)-1] // remove last empty line
+	}
+	for i, s := range got {
+		if !strings.Contains(s, want[i]) {
+			t.Errorf("did not error on line %s", want[i])
+		}
+	}
+	if len(got) != len(want) {
+		t.Errorf("unexpected number of errors, want %d, got %d", len(want), len(got))
+	}
+	if t.Failed() {
+		t.Logf("output:\n%s", string(out))
+	}
+}
diff --git a/src/internal/abi/abi_test.s b/src/internal/abi/abi_test.s
new file mode 100644
index 0000000..93ace3e
--- /dev/null
+++ b/src/internal/abi/abi_test.s
@@ -0,0 +1,27 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+#ifdef GOARCH_386
+#define PTRSIZE 4
+#endif
+#ifdef GOARCH_arm
+#define PTRSIZE 4
+#endif
+#ifdef GOARCH_mips
+#define PTRSIZE 4
+#endif
+#ifdef GOARCH_mipsle
+#define PTRSIZE 4
+#endif
+#ifndef PTRSIZE
+#define PTRSIZE 8
+#endif
+
+TEXT	internal∕abi·FuncPCTestFn(SB),NOSPLIT,$0-0
+	RET
+
+GLOBL	internal∕abi·FuncPCTestFnAddr(SB), NOPTR, $PTRSIZE
+DATA	internal∕abi·FuncPCTestFnAddr(SB)/PTRSIZE, $internal∕abi·FuncPCTestFn(SB)
diff --git a/src/internal/abi/export_test.go b/src/internal/abi/export_test.go
new file mode 100644
index 0000000..2a87e9d
--- /dev/null
+++ b/src/internal/abi/export_test.go
@@ -0,0 +1,14 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package abi
+
+func FuncPCTestFn()
+
+var FuncPCTestFnAddr uintptr // address of FuncPCTestFn, directly retrieved from assembly
+
+//go:noinline
+func FuncPCTest() uintptr {
+	return FuncPCABI0(FuncPCTestFn)
+}
diff --git a/src/internal/abi/testdata/x.go b/src/internal/abi/testdata/x.go
new file mode 100644
index 0000000..cae103d
--- /dev/null
+++ b/src/internal/abi/testdata/x.go
@@ -0,0 +1,22 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x
+
+import "internal/abi"
+
+func Fn0() // defined in assembly
+
+func Fn1() {}
+
+var FnExpr func()
+
+func test() {
+	_ = abi.FuncPCABI0(Fn0)           // line 16, no error
+	_ = abi.FuncPCABIInternal(Fn0)    // line 17, error
+	_ = abi.FuncPCABI0(Fn1)           // line 18, error
+	_ = abi.FuncPCABIInternal(Fn1)    // line 19, no error
+	_ = abi.FuncPCABI0(FnExpr)        // line 20, error
+	_ = abi.FuncPCABIInternal(FnExpr) // line 21, no error
+}
diff --git a/src/internal/abi/testdata/x.s b/src/internal/abi/testdata/x.s
new file mode 100644
index 0000000..63c1385
--- /dev/null
+++ b/src/internal/abi/testdata/x.s
@@ -0,0 +1,6 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+TEXT	·Fn0(SB), 0, $0-0
+	RET
diff --git a/src/internal/buildcfg/cfg.go b/src/internal/buildcfg/cfg.go
new file mode 100644
index 0000000..9fe7f21
--- /dev/null
+++ b/src/internal/buildcfg/cfg.go
@@ -0,0 +1,136 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package buildcfg provides access to the build configuration
+// described by the current environment. It is for use by build tools
+// such as cmd/go or cmd/compile and for setting up go/build's Default context.
+//
+// Note that it does NOT provide access to the build configuration used to
+// build the currently-running binary. For that, use runtime.GOOS etc
+// as well as internal/goexperiment.
+package buildcfg
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+var (
+	defaultGOROOT string // set by linker
+
+	GOROOT   = envOr("GOROOT", defaultGOROOT)
+	GOARCH   = envOr("GOARCH", defaultGOARCH)
+	GOOS     = envOr("GOOS", defaultGOOS)
+	GO386    = envOr("GO386", defaultGO386)
+	GOARM    = goarm()
+	GOMIPS   = gomips()
+	GOMIPS64 = gomips64()
+	GOPPC64  = goppc64()
+	GOWASM   = gowasm()
+	GO_LDSO  = defaultGO_LDSO
+	Version  = version
+)
+
+// Error is one of the errors found (if any) in the build configuration.
+var Error error
+
+// Check exits the program with a fatal error if Error is non-nil.
+func Check() {
+	if Error != nil {
+		fmt.Fprintf(os.Stderr, "%s: %v\n", filepath.Base(os.Args[0]), Error)
+		os.Exit(2)
+	}
+}
+
+func envOr(key, value string) string {
+	if x := os.Getenv(key); x != "" {
+		return x
+	}
+	return value
+}
+
+func goarm() int {
+	def := defaultGOARM
+	if GOOS == "android" && GOARCH == "arm" {
+		// Android arm devices always support GOARM=7.
+		def = "7"
+	}
+	switch v := envOr("GOARM", def); v {
+	case "5":
+		return 5
+	case "6":
+		return 6
+	case "7":
+		return 7
+	}
+	Error = fmt.Errorf("invalid GOARM: must be 5, 6, 7")
+	return int(def[0] - '0')
+}
+
+func gomips() string {
+	switch v := envOr("GOMIPS", defaultGOMIPS); v {
+	case "hardfloat", "softfloat":
+		return v
+	}
+	Error = fmt.Errorf("invalid GOMIPS: must be hardfloat, softfloat")
+	return defaultGOMIPS
+}
+
+func gomips64() string {
+	switch v := envOr("GOMIPS64", defaultGOMIPS64); v {
+	case "hardfloat", "softfloat":
+		return v
+	}
+	Error = fmt.Errorf("invalid GOMIPS64: must be hardfloat, softfloat")
+	return defaultGOMIPS64
+}
+
+func goppc64() int {
+	switch v := envOr("GOPPC64", defaultGOPPC64); v {
+	case "power8":
+		return 8
+	case "power9":
+		return 9
+	}
+	Error = fmt.Errorf("invalid GOPPC64: must be power8, power9")
+	return int(defaultGOPPC64[len("power")] - '0')
+}
+
+type gowasmFeatures struct {
+	SignExt bool
+	SatConv bool
+}
+
+func (f gowasmFeatures) String() string {
+	var flags []string
+	if f.SatConv {
+		flags = append(flags, "satconv")
+	}
+	if f.SignExt {
+		flags = append(flags, "signext")
+	}
+	return strings.Join(flags, ",")
+}
+
+func gowasm() (f gowasmFeatures) {
+	for _, opt := range strings.Split(envOr("GOWASM", ""), ",") {
+		switch opt {
+		case "satconv":
+			f.SatConv = true
+		case "signext":
+			f.SignExt = true
+		case "":
+			// ignore
+		default:
+			Error = fmt.Errorf("invalid GOWASM: no such feature %q", opt)
+		}
+	}
+	return
+}
+
+func Getgoextlinkenabled() string {
+	return envOr("GO_EXTLINK_ENABLED", defaultGO_EXTLINK_ENABLED)
+}
diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go
new file mode 100644
index 0000000..9a60253
--- /dev/null
+++ b/src/internal/buildcfg/exp.go
@@ -0,0 +1,189 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package buildcfg
+
+import (
+	"fmt"
+	"reflect"
+	"strings"
+
+	"internal/goexperiment"
+)
+
+// Experiment contains the toolchain experiments enabled for the
+// current build.
+//
+// (This is not necessarily the set of experiments the compiler itself
+// was built with.)
+//
+// experimentBaseline specifies the experiment flags that are enabled by
+// default in the current toolchain. This is, in effect, the "control"
+// configuration and any variation from this is an experiment.
+var Experiment, experimentBaseline = func() (goexperiment.Flags, goexperiment.Flags) {
+	flags, baseline, err := ParseGOEXPERIMENT(GOOS, GOARCH, envOr("GOEXPERIMENT", defaultGOEXPERIMENT))
+	if err != nil {
+		Error = err
+	}
+	return flags, baseline
+}()
+
+const DefaultGOEXPERIMENT = defaultGOEXPERIMENT
+
+// FramePointerEnabled enables the use of platform conventions for
+// saving frame pointers.
+//
+// This used to be an experiment, but now it's always enabled on
+// platforms that support it.
+//
+// Note: must agree with runtime.framepointer_enabled.
+var FramePointerEnabled = GOARCH == "amd64" || GOARCH == "arm64"
+
+// ParseGOEXPERIMENT parses a (GOOS, GOARCH, GOEXPERIMENT)
+// configuration tuple and returns the enabled and baseline experiment
+// flag sets.
+//
+// TODO(mdempsky): Move to internal/goexperiment.
+func ParseGOEXPERIMENT(goos, goarch, goexp string) (flags, baseline goexperiment.Flags, err error) {
+	regabiSupported := goarch == "amd64" && (goos == "android" || goos == "linux" || goos == "darwin" || goos == "windows")
+
+	baseline = goexperiment.Flags{
+		RegabiWrappers: regabiSupported,
+		RegabiG:        regabiSupported,
+		RegabiReflect:  regabiSupported,
+		RegabiDefer:    regabiSupported,
+		RegabiArgs:     regabiSupported,
+	}
+
+	// Start with the statically enabled set of experiments.
+	flags = baseline
+
+	// Pick up any changes to the baseline configuration from the
+	// GOEXPERIMENT environment. This can be set at make.bash time
+	// and overridden at build time.
+	if goexp != "" {
+		// Create a map of known experiment names.
+		names := make(map[string]func(bool))
+		rv := reflect.ValueOf(&flags).Elem()
+		rt := rv.Type()
+		for i := 0; i < rt.NumField(); i++ {
+			field := rv.Field(i)
+			names[strings.ToLower(rt.Field(i).Name)] = field.SetBool
+		}
+
+		// "regabi" is an alias for all working regabi
+		// subexperiments, and not an experiment itself. Doing
+		// this as an alias make both "regabi" and "noregabi"
+		// do the right thing.
+		names["regabi"] = func(v bool) {
+			flags.RegabiWrappers = v
+			flags.RegabiG = v
+			flags.RegabiReflect = v
+			flags.RegabiDefer = v
+			flags.RegabiArgs = v
+		}
+
+		// Parse names.
+		for _, f := range strings.Split(goexp, ",") {
+			if f == "" {
+				continue
+			}
+			if f == "none" {
+				// GOEXPERIMENT=none disables all experiment flags.
+				// This is used by cmd/dist, which doesn't know how
+				// to build with any experiment flags.
+				flags = goexperiment.Flags{}
+				continue
+			}
+			val := true
+			if strings.HasPrefix(f, "no") {
+				f, val = f[2:], false
+			}
+			set, ok := names[f]
+			if !ok {
+				err = fmt.Errorf("unknown GOEXPERIMENT %s", f)
+				return
+			}
+			set(val)
+		}
+	}
+
+	// regabi is only supported on amd64.
+	if goarch != "amd64" {
+		flags.RegabiWrappers = false
+		flags.RegabiG = false
+		flags.RegabiReflect = false
+		flags.RegabiDefer = false
+		flags.RegabiArgs = false
+	}
+	// Check regabi dependencies.
+	if flags.RegabiG && !flags.RegabiWrappers {
+		err = fmt.Errorf("GOEXPERIMENT regabig requires regabiwrappers")
+	}
+	if flags.RegabiArgs && !(flags.RegabiWrappers && flags.RegabiG && flags.RegabiReflect && flags.RegabiDefer) {
+		err = fmt.Errorf("GOEXPERIMENT regabiargs requires regabiwrappers,regabig,regabireflect,regabidefer")
+	}
+	return
+}
+
+// expList returns the list of lower-cased experiment names for
+// experiments that differ from base. base may be nil to indicate no
+// experiments. If all is true, then include all experiment flags,
+// regardless of base.
+func expList(exp, base *goexperiment.Flags, all bool) []string {
+	var list []string
+	rv := reflect.ValueOf(exp).Elem()
+	var rBase reflect.Value
+	if base != nil {
+		rBase = reflect.ValueOf(base).Elem()
+	}
+	rt := rv.Type()
+	for i := 0; i < rt.NumField(); i++ {
+		name := strings.ToLower(rt.Field(i).Name)
+		val := rv.Field(i).Bool()
+		baseVal := false
+		if base != nil {
+			baseVal = rBase.Field(i).Bool()
+		}
+		if all || val != baseVal {
+			if val {
+				list = append(list, name)
+			} else {
+				list = append(list, "no"+name)
+			}
+		}
+	}
+	return list
+}
+
+// GOEXPERIMENT is a comma-separated list of enabled or disabled
+// experiments that differ from the baseline experiment configuration.
+// GOEXPERIMENT is exactly what a user would set on the command line
+// to get the set of enabled experiments.
+func GOEXPERIMENT() string {
+	return strings.Join(expList(&Experiment, &experimentBaseline, false), ",")
+}
+
+// EnabledExperiments returns a list of enabled experiments, as
+// lower-cased experiment names.
+func EnabledExperiments() []string {
+	return expList(&Experiment, nil, false)
+}
+
+// AllExperiments returns a list of all experiment settings.
+// Disabled experiments appear in the list prefixed by "no".
+func AllExperiments() []string {
+	return expList(&Experiment, nil, true)
+}
+
+// UpdateExperiments updates the Experiment global based on a new GOARCH value.
+// This is only required for cmd/go, which can change GOARCH after
+// program startup due to use of "go env -w".
+func UpdateExperiments(goos, goarch, goexperiment string) {
+	var err error
+	Experiment, experimentBaseline, err = ParseGOEXPERIMENT(goos, goarch, goexperiment)
+	if err != nil {
+		Error = err
+	}
+}
diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go
new file mode 100644
index 0000000..6b2b540
--- /dev/null
+++ b/src/internal/bytealg/bytealg.go
@@ -0,0 +1,151 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+import (
+	"internal/cpu"
+	"unsafe"
+)
+
+// Offsets into internal/cpu records for use in assembly.
+const (
+	offsetX86HasSSE2   = unsafe.Offsetof(cpu.X86.HasSSE2)
+	offsetX86HasSSE42  = unsafe.Offsetof(cpu.X86.HasSSE42)
+	offsetX86HasAVX2   = unsafe.Offsetof(cpu.X86.HasAVX2)
+	offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
+
+	offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
+
+	offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9)
+)
+
+// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
+// If MaxLen is not 0, make sure MaxLen >= 4.
+var MaxLen int
+
+// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
+// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
+// three of them without causing allocation?
+
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
+const PrimeRK = 16777619
+
+// HashStrBytes returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStrBytes(sep []byte) (uint32, uint32) {
+	hash := uint32(0)
+	for i := 0; i < len(sep); i++ {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStr(sep string) (uint32, uint32) {
+	hash := uint32(0)
+	for i := 0; i < len(sep); i++ {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStrRevBytes returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRevBytes(sep []byte) (uint32, uint32) {
+	hash := uint32(0)
+	for i := len(sep) - 1; i >= 0; i-- {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRev(sep string) (uint32, uint32) {
+	hash := uint32(0)
+	for i := len(sep) - 1; i >= 0; i-- {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
+// first occurrence of substr in s, or -1 if not present.
+func IndexRabinKarpBytes(s, sep []byte) int {
+	// Rabin-Karp search
+	hashsep, pow := HashStrBytes(sep)
+	n := len(sep)
+	var h uint32
+	for i := 0; i < n; i++ {
+		h = h*PrimeRK + uint32(s[i])
+	}
+	if h == hashsep && Equal(s[:n], sep) {
+		return 0
+	}
+	for i := n; i < len(s); {
+		h *= PrimeRK
+		h += uint32(s[i])
+		h -= pow * uint32(s[i-n])
+		i++
+		if h == hashsep && Equal(s[i-n:i], sep) {
+			return i - n
+		}
+	}
+	return -1
+}
+
+// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
+// first occurrence of substr in s, or -1 if not present.
+func IndexRabinKarp(s, substr string) int {
+	// Rabin-Karp search
+	hashss, pow := HashStr(substr)
+	n := len(substr)
+	var h uint32
+	for i := 0; i < n; i++ {
+		h = h*PrimeRK + uint32(s[i])
+	}
+	if h == hashss && s[:n] == substr {
+		return 0
+	}
+	for i := n; i < len(s); {
+		h *= PrimeRK
+		h += uint32(s[i])
+		h -= pow * uint32(s[i-n])
+		i++
+		if h == hashss && s[i-n:i] == substr {
+			return i - n
+		}
+	}
+	return -1
+}
diff --git a/src/internal/bytealg/compare_386.s b/src/internal/bytealg/compare_386.s
new file mode 100644
index 0000000..0981983
--- /dev/null
+++ b/src/internal/bytealg/compare_386.s
@@ -0,0 +1,143 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-28
+	MOVL	a_base+0(FP), SI
+	MOVL	a_len+4(FP), BX
+	MOVL	b_base+12(FP), DI
+	MOVL	b_len+16(FP), DX
+	LEAL	ret+24(FP), AX
+	JMP	cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
+	MOVL	a_base+0(FP), SI
+	MOVL	a_len+4(FP), BX
+	MOVL	b_base+8(FP), DI
+	MOVL	b_len+12(FP), DX
+	LEAL	ret+16(FP), AX
+	JMP	cmpbody<>(SB)
+
+// input:
+//   SI = a
+//   DI = b
+//   BX = alen
+//   DX = blen
+//   AX = address of return word (set to 1/0/-1)
+TEXT cmpbody<>(SB),NOSPLIT,$0-0
+	MOVL	DX, BP
+	SUBL	BX, DX // DX = blen-alen
+	JLE	2(PC)
+	MOVL	BX, BP // BP = min(alen, blen)
+	CMPL	SI, DI
+	JEQ	allsame
+	CMPL	BP, $4
+	JB	small
+	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
+	JNE	mediumloop
+largeloop:
+	CMPL	BP, $16
+	JB	mediumloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, BX
+	XORL	$0xffff, BX	// convert EQ to NE
+	JNE	diff16	// branch if at least one byte is not equal
+	ADDL	$16, SI
+	ADDL	$16, DI
+	SUBL	$16, BP
+	JMP	largeloop
+
+diff16:
+	BSFL	BX, BX	// index of first byte that differs
+	XORL	DX, DX
+	MOVB	(SI)(BX*1), CX
+	CMPB	CX, (DI)(BX*1)
+	SETHI	DX
+	LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
+	MOVL	DX, (AX)
+	RET
+
+mediumloop:
+	CMPL	BP, $4
+	JBE	_0through4
+	MOVL	(SI), BX
+	MOVL	(DI), CX
+	CMPL	BX, CX
+	JNE	diff4
+	ADDL	$4, SI
+	ADDL	$4, DI
+	SUBL	$4, BP
+	JMP	mediumloop
+
+_0through4:
+	MOVL	-4(SI)(BP*1), BX
+	MOVL	-4(DI)(BP*1), CX
+	CMPL	BX, CX
+	JEQ	allsame
+
+diff4:
+	BSWAPL	BX	// reverse order of bytes
+	BSWAPL	CX
+	XORL	BX, CX	// find bit differences
+	BSRL	CX, CX	// index of highest bit difference
+	SHRL	CX, BX	// move a's bit to bottom
+	ANDL	$1, BX	// mask bit
+	LEAL	-1(BX*2), BX // 1/0 => +1/-1
+	MOVL	BX, (AX)
+	RET
+
+	// 0-3 bytes in common
+small:
+	LEAL	(BP*8), CX
+	NEGL	CX
+	JEQ	allsame
+
+	// load si
+	CMPB	SI, $0xfc
+	JA	si_high
+	MOVL	(SI), SI
+	JMP	si_finish
+si_high:
+	MOVL	-4(SI)(BP*1), SI
+	SHRL	CX, SI
+si_finish:
+	SHLL	CX, SI
+
+	// same for di
+	CMPB	DI, $0xfc
+	JA	di_high
+	MOVL	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVL	-4(DI)(BP*1), DI
+	SHRL	CX, DI
+di_finish:
+	SHLL	CX, DI
+
+	BSWAPL	SI	// reverse order of bytes
+	BSWAPL	DI
+	XORL	SI, DI	// find bit differences
+	JEQ	allsame
+	BSRL	DI, CX	// index of highest bit difference
+	SHRL	CX, SI	// move a's bit to bottom
+	ANDL	$1, SI	// mask bit
+	LEAL	-1(SI*2), BX // 1/0 => +1/-1
+	MOVL	BX, (AX)
+	RET
+
+	// all the bytes in common are the same, so we just need
+	// to compare the lengths.
+allsame:
+	XORL	BX, BX
+	XORL	CX, CX
+	TESTL	DX, DX
+	SETLT	BX	// 1 if alen > blen
+	SETEQ	CX	// 1 if alen == blen
+	LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
+	MOVL	BX, (AX)
+	RET
diff --git a/src/internal/bytealg/compare_amd64.s b/src/internal/bytealg/compare_amd64.s
new file mode 100644
index 0000000..8295acb
--- /dev/null
+++ b/src/internal/bytealg/compare_amd64.s
@@ -0,0 +1,262 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
+#ifdef GOEXPERIMENT_regabiargs
+	// AX = a_base (want in SI)
+	// BX = a_len  (want in BX)
+	// CX = a_cap  (unused)
+	// DI = b_base (want in DI)
+	// SI = b_len  (want in DX)
+	// R8 = b_cap  (unused)
+	MOVQ	SI, DX
+	MOVQ	AX, SI
+#else
+	MOVQ	a_base+0(FP), SI
+	MOVQ	a_len+8(FP), BX
+	MOVQ	b_base+24(FP), DI
+	MOVQ	b_len+32(FP), DX
+	LEAQ	ret+48(FP), R9
+#endif
+	JMP	cmpbody<>(SB)
+
+TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
+#ifdef GOEXPERIMENT_regabiargs
+	// AX = a_base (want in SI)
+	// BX = a_len  (want in BX)
+	// CX = b_base (want in DI)
+	// DI = b_len  (want in DX)
+	MOVQ	AX, SI
+	MOVQ	DI, DX
+	MOVQ	CX, DI
+#else
+	MOVQ	a_base+0(FP), SI
+	MOVQ	a_len+8(FP), BX
+	MOVQ	b_base+16(FP), DI
+	MOVQ	b_len+24(FP), DX
+	LEAQ	ret+32(FP), R9
+#endif
+	JMP	cmpbody<>(SB)
+
+// input:
+//   SI = a
+//   DI = b
+//   BX = alen
+//   DX = blen
+#ifndef GOEXPERIMENT_regabiargs
+//   R9 = address of output word (stores -1/0/1 here)
+#else
+// output:
+//   AX = output (-1/0/1)
+#endif
+TEXT cmpbody<>(SB),NOSPLIT,$0-0
+	CMPQ	SI, DI
+	JEQ	allsame
+	CMPQ	BX, DX
+	MOVQ	DX, R8
+	CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
+	CMPQ	R8, $8
+	JB	small
+
+	CMPQ	R8, $63
+	JBE	loop
+	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
+	JEQ     big_loop_avx2
+	JMP	big_loop
+loop:
+	CMPQ	R8, $16
+	JBE	_0through16
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX	// convert EQ to NE
+	JNE	diff16	// branch if at least one byte is not equal
+	ADDQ	$16, SI
+	ADDQ	$16, DI
+	SUBQ	$16, R8
+	JMP	loop
+
+diff64:
+	ADDQ	$48, SI
+	ADDQ	$48, DI
+	JMP	diff16
+diff48:
+	ADDQ	$32, SI
+	ADDQ	$32, DI
+	JMP	diff16
+diff32:
+	ADDQ	$16, SI
+	ADDQ	$16, DI
+	// AX = bit mask of differences
+diff16:
+	BSFQ	AX, BX	// index of first byte that differs
+	XORQ	AX, AX
+	MOVB	(SI)(BX*1), CX
+	CMPB	CX, (DI)(BX*1)
+	SETHI	AX
+	LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
+#ifndef GOEXPERIMENT_regabiargs
+	MOVQ	AX, (R9)
+#endif
+	RET
+
+	// 0 through 16 bytes left, alen>=8, blen>=8
+_0through16:
+	CMPQ	R8, $8
+	JBE	_0through8
+	MOVQ	(SI), AX
+	MOVQ	(DI), CX
+	CMPQ	AX, CX
+	JNE	diff8
+_0through8:
+	MOVQ	-8(SI)(R8*1), AX
+	MOVQ	-8(DI)(R8*1), CX
+	CMPQ	AX, CX
+	JEQ	allsame
+
+	// AX and CX contain parts of a and b that differ.
+diff8:
+	BSWAPQ	AX	// reverse order of bytes
+	BSWAPQ	CX
+	XORQ	AX, CX
+	BSRQ	CX, CX	// index of highest bit difference
+	SHRQ	CX, AX	// move a's bit to bottom
+	ANDQ	$1, AX	// mask bit
+	LEAQ	-1(AX*2), AX // 1/0 => +1/-1
+#ifndef GOEXPERIMENT_regabiargs
+	MOVQ	AX, (R9)
+#endif
+	RET
+
+	// 0-7 bytes in common
+small:
+	LEAQ	(R8*8), CX	// bytes left -> bits left
+	NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
+	JEQ	allsame
+
+	// load bytes of a into high bytes of AX
+	CMPB	SI, $0xf8
+	JA	si_high
+	MOVQ	(SI), SI
+	JMP	si_finish
+si_high:
+	MOVQ	-8(SI)(R8*1), SI
+	SHRQ	CX, SI
+si_finish:
+	SHLQ	CX, SI
+
+	// load bytes of b in to high bytes of BX
+	CMPB	DI, $0xf8
+	JA	di_high
+	MOVQ	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVQ	-8(DI)(R8*1), DI
+	SHRQ	CX, DI
+di_finish:
+	SHLQ	CX, DI
+
+	BSWAPQ	SI	// reverse order of bytes
+	BSWAPQ	DI
+	XORQ	SI, DI	// find bit differences
+	JEQ	allsame
+	BSRQ	DI, CX	// index of highest bit difference
+	SHRQ	CX, SI	// move a's bit to bottom
+	ANDQ	$1, SI	// mask bit
+	LEAQ	-1(SI*2), AX // 1/0 => +1/-1
+#ifndef GOEXPERIMENT_regabiargs
+	MOVQ	AX, (R9)
+#endif
+	RET
+
+allsame:
+	XORQ	AX, AX
+	XORQ	CX, CX
+	CMPQ	BX, DX
+	SETGT	AX	// 1 if alen > blen
+	SETEQ	CX	// 1 if alen == blen
+	LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
+#ifndef GOEXPERIMENT_regabiargs
+	MOVQ	AX, (R9)
+#endif
+	RET
+
+	// this works for >= 64 bytes of data.
+big_loop:
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX
+	JNE	diff16
+
+	MOVOU	16(SI), X0
+	MOVOU	16(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX
+	JNE	diff32
+
+	MOVOU	32(SI), X0
+	MOVOU	32(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX
+	JNE	diff48
+
+	MOVOU	48(SI), X0
+	MOVOU	48(DI), X1
+	PCMPEQB X0, X1
+	PMOVMSKB X1, AX
+	XORQ	$0xffff, AX
+	JNE	diff64
+
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, R8
+	CMPQ	R8, $64
+	JBE	loop
+	JMP	big_loop
+
+	// Compare 64-bytes per loop iteration.
+	// Loop is unrolled and uses AVX2.
+big_loop_avx2:
+	VMOVDQU	(SI), Y2
+	VMOVDQU	(DI), Y3
+	VMOVDQU	32(SI), Y4
+	VMOVDQU	32(DI), Y5
+	VPCMPEQB Y2, Y3, Y0
+	VPMOVMSKB Y0, AX
+	XORL	$0xffffffff, AX
+	JNE	diff32_avx2
+	VPCMPEQB Y4, Y5, Y6
+	VPMOVMSKB Y6, AX
+	XORL	$0xffffffff, AX
+	JNE	diff64_avx2
+
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, R8
+	CMPQ	R8, $64
+	JB	big_loop_avx2_exit
+	JMP	big_loop_avx2
+
+	// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
+diff32_avx2:
+	VZEROUPPER
+	JMP diff16
+
+	// Same as diff32_avx2, but for last 32 bytes.
+diff64_avx2:
+	VZEROUPPER
+	JMP diff48
+
+	// For <64 bytes remainder jump to normal loop.
+big_loop_avx2_exit:
+	VZEROUPPER
+	JMP loop
diff --git a/src/internal/bytealg/compare_arm.s b/src/internal/bytealg/compare_arm.s
new file mode 100644
index 0000000..80d01a2
--- /dev/null
+++ b/src/internal/bytealg/compare_arm.s
@@ -0,0 +1,86 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-28
+	MOVW	a_base+0(FP), R2
+	MOVW	a_len+4(FP), R0
+	MOVW	b_base+12(FP), R3
+	MOVW	b_len+16(FP), R1
+	ADD	$28, R13, R7
+	B	cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
+	MOVW	a_base+0(FP), R2
+	MOVW	a_len+4(FP), R0
+	MOVW	b_base+8(FP), R3
+	MOVW	b_len+12(FP), R1
+	ADD	$20, R13, R7
+	B	cmpbody<>(SB)
+
+// On entry:
+// R0 is the length of a
+// R1 is the length of b
+// R2 points to the start of a
+// R3 points to the start of b
+// R7 points to return value (-1/0/1 will be written here)
+//
+// On exit:
+// R4, R5, R6 and R8 are clobbered
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	CMP	R2, R3
+	BEQ	samebytes
+	CMP 	R0, R1
+	MOVW 	R0, R6
+	MOVW.LT	R1, R6		// R6 is min(R0, R1)
+
+	CMP	$0, R6
+	BEQ	samebytes
+	CMP	$4, R6
+	ADD	R2, R6		// R2 is current byte in a, R6 is the end of the range to compare
+	BLT	byte_loop	// length < 4
+	AND	$3, R2, R8
+	CMP	$0, R8
+	BNE	byte_loop	// unaligned a, use byte-wise compare (TODO: try to align a)
+aligned_a:
+	AND	$3, R3, R8
+	CMP	$0, R8
+	BNE	byte_loop	// unaligned b, use byte-wise compare
+	AND	$0xfffffffc, R6, R8
+	// length >= 4
+chunk4_loop:
+	MOVW.P	4(R2), R4
+	MOVW.P	4(R3), R5
+	CMP	R4, R5
+	BNE	cmp
+	CMP	R2, R8
+	BNE	chunk4_loop
+	CMP	R2, R6
+	BEQ	samebytes	// all compared bytes were the same; compare lengths
+byte_loop:
+	MOVBU.P	1(R2), R4
+	MOVBU.P	1(R3), R5
+	CMP	R4, R5
+	BNE	ret
+	CMP	R2, R6
+	BNE	byte_loop
+samebytes:
+	CMP	R0, R1
+	MOVW.LT	$1, R0
+	MOVW.GT	$-1, R0
+	MOVW.EQ	$0, R0
+	MOVW	R0, (R7)
+	RET
+ret:
+	// bytes differed
+	MOVW.LT	$1, R0
+	MOVW.GT	$-1, R0
+	MOVW	R0, (R7)
+	RET
+cmp:
+	SUB	$4, R2, R2
+	SUB	$4, R3, R3
+	B	byte_loop
diff --git a/src/internal/bytealg/compare_arm64.s b/src/internal/bytealg/compare_arm64.s
new file mode 100644
index 0000000..56d56f2
--- /dev/null
+++ b/src/internal/bytealg/compare_arm64.s
@@ -0,0 +1,125 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
+	MOVD	a_base+0(FP), R2
+	MOVD	a_len+8(FP), R0
+	MOVD	b_base+24(FP), R3
+	MOVD	b_len+32(FP), R1
+	MOVD	$ret+48(FP), R7
+	B	cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	a_base+0(FP), R2
+	MOVD	a_len+8(FP), R0
+	MOVD	b_base+16(FP), R3
+	MOVD	b_len+24(FP), R1
+	MOVD	$ret+32(FP), R7
+	B	cmpbody<>(SB)
+
+// On entry:
+// R0 is the length of a
+// R1 is the length of b
+// R2 points to the start of a
+// R3 points to the start of b
+// R7 points to return value (-1/0/1 will be written here)
+//
+// On exit:
+// R4, R5, R6, R8, R9 and R10 are clobbered
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	CMP	R2, R3
+	BEQ	samebytes         // same starting pointers; compare lengths
+	CMP	R0, R1
+	CSEL	LT, R1, R0, R6    // R6 is min(R0, R1)
+
+	CBZ	R6, samebytes
+	BIC	$0xf, R6, R10
+	CBZ	R10, small        // length < 16
+	ADD	R2, R10           // end of chunk16
+	// length >= 16
+chunk16_loop:
+	LDP.P	16(R2), (R4, R8)
+	LDP.P	16(R3), (R5, R9)
+	CMP	R4, R5
+	BNE	cmp
+	CMP	R8, R9
+	BNE	cmpnext
+	CMP	R10, R2
+	BNE	chunk16_loop
+	AND	$0xf, R6, R6
+	CBZ	R6, samebytes
+	SUBS	$8, R6
+	BLT	tail
+	// the length of tail > 8 bytes
+	MOVD.P	8(R2), R4
+	MOVD.P	8(R3), R5
+	CMP	R4, R5
+	BNE	cmp
+	SUB	$8, R6
+	// compare last 8 bytes
+tail:
+	MOVD	(R2)(R6), R4
+	MOVD	(R3)(R6), R5
+	CMP	R4, R5
+	BEQ	samebytes
+cmp:
+	REV	R4, R4
+	REV	R5, R5
+	CMP	R4, R5
+ret:
+	MOVD	$1, R4
+	CNEG	HI, R4, R4
+	MOVD	R4, (R7)
+	RET
+small:
+	TBZ	$3, R6, lt_8
+	MOVD	(R2), R4
+	MOVD	(R3), R5
+	CMP	R4, R5
+	BNE	cmp
+	SUBS	$8, R6
+	BEQ	samebytes
+	ADD	$8, R2
+	ADD	$8, R3
+	SUB	$8, R6
+	B	tail
+lt_8:
+	TBZ	$2, R6, lt_4
+	MOVWU	(R2), R4
+	MOVWU	(R3), R5
+	CMPW	R4, R5
+	BNE	cmp
+	SUBS	$4, R6
+	BEQ	samebytes
+	ADD	$4, R2
+	ADD	$4, R3
+lt_4:
+	TBZ	$1, R6, lt_2
+	MOVHU	(R2), R4
+	MOVHU	(R3), R5
+	CMPW	R4, R5
+	BNE	cmp
+	ADD	$2, R2
+	ADD	$2, R3
+lt_2:
+	TBZ	$0, R6, samebytes
+one:
+	MOVBU	(R2), R4
+	MOVBU	(R3), R5
+	CMPW	R4, R5
+	BNE	ret
+samebytes:
+	CMP	R1, R0
+	CSET	NE, R4
+	CNEG	LO, R4, R4
+	MOVD	R4, (R7)
+	RET
+cmpnext:
+	REV	R8, R4
+	REV	R9, R5
+	CMP	R4, R5
+	B	ret
diff --git a/src/internal/bytealg/compare_generic.go b/src/internal/bytealg/compare_generic.go
new file mode 100644
index 0000000..0690d0c
--- /dev/null
+++ b/src/internal/bytealg/compare_generic.go
@@ -0,0 +1,61 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64 && !s390x && !arm && !arm64 && !ppc64 && !ppc64le && !mips && !mipsle && !wasm && !mips64 && !mips64le
+// +build !386,!amd64,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
+
+package bytealg
+
+import _ "unsafe" // for go:linkname
+
+func Compare(a, b []byte) int {
+	l := len(a)
+	if len(b) < l {
+		l = len(b)
+	}
+	if l == 0 || &a[0] == &b[0] {
+		goto samebytes
+	}
+	for i := 0; i < l; i++ {
+		c1, c2 := a[i], b[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+	}
+samebytes:
+	if len(a) < len(b) {
+		return -1
+	}
+	if len(a) > len(b) {
+		return +1
+	}
+	return 0
+}
+
+//go:linkname runtime_cmpstring runtime.cmpstring
+func runtime_cmpstring(a, b string) int {
+	l := len(a)
+	if len(b) < l {
+		l = len(b)
+	}
+	for i := 0; i < l; i++ {
+		c1, c2 := a[i], b[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+	}
+	if len(a) < len(b) {
+		return -1
+	}
+	if len(a) > len(b) {
+		return +1
+	}
+	return 0
+}
diff --git a/src/internal/bytealg/compare_mips64x.s b/src/internal/bytealg/compare_mips64x.s
new file mode 100644
index 0000000..b472e51
--- /dev/null
+++ b/src/internal/bytealg/compare_mips64x.s
@@ -0,0 +1,89 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-56
+	MOVV	a_base+0(FP), R3
+	MOVV	b_base+24(FP), R4
+	MOVV	a_len+8(FP), R1
+	MOVV	b_len+32(FP), R2
+	MOVV	$ret+48(FP), R9
+	JMP	cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
+	MOVV	a_base+0(FP), R3
+	MOVV	b_base+16(FP), R4
+	MOVV	a_len+8(FP), R1
+	MOVV	b_len+24(FP), R2
+	MOVV	$ret+32(FP), R9
+	JMP	cmpbody<>(SB)
+
+// On entry:
+// R1 length of a
+// R2 length of b
+// R3 points to the start of a
+// R4 points to the start of b
+// R9 points to the return value (-1/0/1)
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
+	BEQ	R3, R4, samebytes // same start of a and b
+
+	SGTU	R1, R2, R7
+	BNE	R0, R7, r2_lt_r1
+	MOVV	R1, R10
+	JMP	entry
+r2_lt_r1:
+	MOVV	R2, R10	// R10 is min(R1, R2)
+entry:
+	ADDV	R3, R10, R8	// R3 start of a, R8 end of a
+	BEQ	R3, R8, samebytes // length is 0
+
+	SRLV	$4, R10		// R10 is number of chunks
+	BEQ	R0, R10, byte_loop
+
+	// make sure both a and b are aligned.
+	OR	R3, R4, R11
+	AND	$7, R11
+	BNE	R0, R11, byte_loop
+
+chunk16_loop:
+	BEQ	R0, R10, byte_loop
+	MOVV	(R3), R6
+	MOVV	(R4), R7
+	BNE	R6, R7, byte_loop
+	MOVV	8(R3), R13
+	MOVV	8(R4), R14
+	ADDV	$16, R3
+	ADDV	$16, R4
+	SUBVU	$1, R10
+	BEQ	R13, R14, chunk16_loop
+	SUBV	$8, R3
+	SUBV	$8, R4
+
+byte_loop:
+	BEQ	R3, R8, samebytes
+	MOVBU	(R3), R6
+	ADDVU	$1, R3
+	MOVBU	(R4), R7
+	ADDVU	$1, R4
+	BEQ	R6, R7, byte_loop
+
+byte_cmp:
+	SGTU	R6, R7, R8 // R8 = 1 if (R6 > R7)
+	BNE	R0, R8, ret
+	MOVV	$-1, R8
+	JMP	ret
+
+samebytes:
+	SGTU	R1, R2, R6
+	SGTU	R2, R1, R7
+	SUBV	R7, R6, R8
+
+ret:
+	MOVV	R8, (R9)
+	RET
diff --git a/src/internal/bytealg/compare_mipsx.s b/src/internal/bytealg/compare_mipsx.s
new file mode 100644
index 0000000..dcc4916
--- /dev/null
+++ b/src/internal/bytealg/compare_mipsx.s
@@ -0,0 +1,73 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+// +build mips mipsle
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT,$0-28
+	MOVW	a_base+0(FP), R3
+	MOVW	b_base+12(FP), R4
+	MOVW	a_len+4(FP), R1
+	MOVW	b_len+16(FP), R2
+	BEQ	R3, R4, samebytes
+	SGTU	R1, R2, R7
+	MOVW	R1, R8
+	CMOVN	R7, R2, R8	// R8 is min(R1, R2)
+
+	ADDU	R3, R8	// R3 is current byte in a, R8 is last byte in a to compare
+loop:
+	BEQ	R3, R8, samebytes
+
+	MOVBU	(R3), R6
+	ADDU	$1, R3
+	MOVBU	(R4), R7
+	ADDU	$1, R4
+	BEQ	R6, R7 , loop
+
+	SGTU	R6, R7, R8
+	MOVW	$-1, R6
+	CMOVZ	R8, R6, R8
+	JMP	cmp_ret
+samebytes:
+	SGTU	R1, R2, R6
+	SGTU	R2, R1, R7
+	SUBU	R7, R6, R8
+cmp_ret:
+	MOVW	R8, ret+24(FP)
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
+	MOVW	a_base+0(FP), R3
+	MOVW	a_len+4(FP), R1
+	MOVW	b_base+8(FP), R4
+	MOVW	b_len+12(FP), R2
+	BEQ	R3, R4, samebytes
+	SGTU	R1, R2, R7
+	MOVW	R1, R8
+	CMOVN	R7, R2, R8	// R8 is min(R1, R2)
+
+	ADDU	R3, R8	// R3 is current byte in a, R8 is last byte in a to compare
+loop:
+	BEQ	R3, R8, samebytes	// all compared bytes were the same; compare lengths
+
+	MOVBU	(R3), R6
+	ADDU	$1, R3
+	MOVBU	(R4), R7
+	ADDU	$1, R4
+	BEQ	R6, R7 , loop
+	// bytes differed
+	SGTU	R6, R7, R8
+	MOVW	$-1, R6
+	CMOVZ	R8, R6, R8
+	JMP	cmp_ret
+samebytes:
+	SGTU	R1, R2, R6
+	SGTU	R2, R1, R7
+	SUBU	R7, R6, R8
+cmp_ret:
+	MOVW	R8, ret+16(FP)
+	RET
diff --git a/src/internal/bytealg/compare_native.go b/src/internal/bytealg/compare_native.go
new file mode 100644
index 0000000..baa188f
--- /dev/null
+++ b/src/internal/bytealg/compare_native.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || s390x || arm || arm64 || ppc64 || ppc64le || mips || mipsle || wasm || mips64 || mips64le
+// +build 386 amd64 s390x arm arm64 ppc64 ppc64le mips mipsle wasm mips64 mips64le
+
+package bytealg
+
+import _ "unsafe" // For go:linkname
+
+//go:noescape
+func Compare(a, b []byte) int
+
+// The declaration below generates ABI wrappers for functions
+// implemented in assembly in this package but declared in another
+// package.
+
+//go:linkname abigen_runtime_cmpstring runtime.cmpstring
+func abigen_runtime_cmpstring(a, b string) int
diff --git a/src/internal/bytealg/compare_ppc64x.s b/src/internal/bytealg/compare_ppc64x.s
new file mode 100644
index 0000000..83444fa
--- /dev/null
+++ b/src/internal/bytealg/compare_ppc64x.s
@@ -0,0 +1,278 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
+	MOVD	a_base+0(FP), R5
+	MOVD	b_base+24(FP), R6
+	MOVD	a_len+8(FP), R3
+	CMP	R5,R6,CR7
+	MOVD	b_len+32(FP), R4
+	MOVD	$ret+48(FP), R7
+	CMP	R3,R4,CR6
+	BEQ	CR7,equal
+
+#ifdef	GOARCH_ppc64le
+	BR	cmpbodyLE<>(SB)
+#else
+	BR      cmpbodyBE<>(SB)
+#endif
+
+equal:
+	BEQ	CR6,done
+	MOVD	$1, R8
+	BGT	CR6,greater
+	NEG	R8
+
+greater:
+	MOVD	R8, (R7)
+	RET
+
+done:
+	MOVD	$0, (R7)
+	RET
+
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	a_base+0(FP), R5
+	MOVD	b_base+16(FP), R6
+	MOVD	a_len+8(FP), R3
+	CMP	R5,R6,CR7
+	MOVD	b_len+24(FP), R4
+	MOVD	$ret+32(FP), R7
+	CMP	R3,R4,CR6
+	BEQ	CR7,equal
+
+#ifdef	GOARCH_ppc64le
+	BR	cmpbodyLE<>(SB)
+#else
+	BR      cmpbodyBE<>(SB)
+#endif
+
+equal:
+	BEQ	CR6,done
+	MOVD	$1, R8
+	BGT	CR6,greater
+	NEG	R8
+
+greater:
+	MOVD	R8, (R7)
+	RET
+
+done:
+	MOVD	$0, (R7)
+	RET
+
+// Do an efficient memcmp for ppc64le
+// R3 = a len
+// R4 = b len
+// R5 = a addr
+// R6 = b addr
+// R7 = addr of return value
+TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	R3,R8		// set up length
+	CMP	R3,R4,CR2	// unequal?
+	BC	12,8,setuplen	// BLT CR2
+	MOVD	R4,R8		// use R4 for comparison len
+setuplen:
+	MOVD	R8,CTR		// set up loop counter
+	CMP	R8,$8		// only optimize >=8
+	BLT	simplecheck
+	DCBT	(R5)		// cache hint
+	DCBT	(R6)
+	CMP	R8,$32		// optimize >= 32
+	MOVD	R8,R9
+	BLT	setup8a		// 8 byte moves only
+setup32a:
+	SRADCC	$5,R8,R9	// number of 32 byte chunks
+	MOVD	R9,CTR
+
+        // Special processing for 32 bytes or longer.
+        // Loading this way is faster and correct as long as the
+	// doublewords being compared are equal. Once they
+	// are found unequal, reload them in proper byte order
+	// to determine greater or less than.
+loop32a:
+	MOVD	0(R5),R9	// doublewords to compare
+	MOVD	0(R6),R10	// get 4 doublewords
+	MOVD	8(R5),R14
+	MOVD	8(R6),R15
+	CMPU	R9,R10		// bytes equal?
+	MOVD	$0,R16		// set up for cmpne
+	BNE	cmpne		// further compare for LT or GT
+	MOVD	16(R5),R9	// get next pair of doublewords
+	MOVD	16(R6),R10
+	CMPU	R14,R15		// bytes match?
+	MOVD	$8,R16		// set up for cmpne
+	BNE	cmpne		// further compare for LT or GT
+	MOVD	24(R5),R14	// get next pair of doublewords
+	MOVD    24(R6),R15
+	CMPU	R9,R10		// bytes match?
+	MOVD	$16,R16		// set up for cmpne
+	BNE	cmpne		// further compare for LT or GT
+	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
+	ADD	$32,R5		// bump up to next 32
+	ADD	$32,R6
+	CMPU    R14,R15		// bytes match?
+	BC	8,2,loop32a	// br ctr and cr
+	BNE	cmpne
+	ANDCC	$24,R8,R9	// Any 8 byte chunks?
+	BEQ	leftover	// and result is 0
+setup8a:
+	SRADCC	$3,R9,R9	// get the 8 byte count
+	BEQ	leftover	// shifted value is 0
+	MOVD	R9,CTR		// loop count for doublewords
+loop8:
+	MOVDBR	(R5+R0),R9	// doublewords to compare
+	MOVDBR	(R6+R0),R10	// LE compare order
+	ADD	$8,R5
+	ADD	$8,R6
+	CMPU	R9,R10		// match?
+	BC	8,2,loop8	// bt ctr <> 0 && cr
+	BGT	greater
+	BLT	less
+leftover:
+	ANDCC	$7,R8,R9	// check for leftover bytes
+	MOVD	R9,CTR		// save the ctr
+	BNE	simple		// leftover bytes
+	BC	12,10,equal	// test CR2 for length comparison
+	BC	12,8,less
+	BR	greater
+simplecheck:
+	CMP	R8,$0		// remaining compare length 0
+	BNE	simple		// do simple compare
+	BC	12,10,equal	// test CR2 for length comparison
+	BC	12,8,less	// 1st len < 2nd len, result less
+	BR	greater		// 1st len > 2nd len must be greater
+simple:
+	MOVBZ	0(R5), R9	// get byte from 1st operand
+	ADD	$1,R5
+	MOVBZ	0(R6), R10	// get byte from 2nd operand
+	ADD	$1,R6
+	CMPU	R9, R10
+	BC	8,2,simple	// bc ctr <> 0 && cr
+	BGT	greater		// 1st > 2nd
+	BLT	less		// 1st < 2nd
+	BC	12,10,equal	// test CR2 for length comparison
+	BC	12,9,greater	// 2nd len > 1st len
+	BR	less		// must be less
+cmpne:				// only here is not equal
+	MOVDBR	(R5+R16),R8	// reload in reverse order
+	MOVDBR	(R6+R16),R9
+	CMPU	R8,R9		// compare correct endianness
+	BGT	greater		// here only if NE
+less:
+	MOVD	$-1,R3
+	MOVD	R3,(R7)		// return value if A < B
+	RET
+equal:
+	MOVD	$0,(R7)		// return value if A == B
+	RET
+greater:
+	MOVD	$1,R3
+	MOVD	R3,(R7)		// return value if A > B
+	RET
+
+// Do an efficient memcmp for ppc64 (BE)
+// R3 = a len
+// R4 = b len
+// R5 = a addr
+// R6 = b addr
+// R7 = addr of return value
+TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	R3,R8		// set up length
+	CMP	R3,R4,CR2	// unequal?
+	BC	12,8,setuplen	// BLT CR2
+	MOVD	R4,R8		// use R4 for comparison len
+setuplen:
+	MOVD	R8,CTR		// set up loop counter
+	CMP	R8,$8		// only optimize >=8
+	BLT	simplecheck
+	DCBT	(R5)		// cache hint
+	DCBT	(R6)
+	CMP	R8,$32		// optimize >= 32
+	MOVD	R8,R9
+	BLT	setup8a		// 8 byte moves only
+
+setup32a:
+	SRADCC	$5,R8,R9	// number of 32 byte chunks
+	MOVD	R9,CTR
+loop32a:
+	MOVD	0(R5),R9	// doublewords to compare
+	MOVD	0(R6),R10	// get 4 doublewords
+	MOVD	8(R5),R14
+	MOVD	8(R6),R15
+	CMPU	R9,R10		// bytes equal?
+	BLT	less		// found to be less
+	BGT	greater		// found to be greater
+	MOVD	16(R5),R9	// get next pair of doublewords
+	MOVD	16(R6),R10
+	CMPU	R14,R15		// bytes match?
+	BLT	less		// found less
+	BGT	greater		// found greater
+	MOVD	24(R5),R14	// get next pair of doublewords
+	MOVD	24(R6),R15
+	CMPU	R9,R10		// bytes match?
+	BLT	less		// found to be less
+	BGT	greater		// found to be greater
+	ADD	$32,R5		// bump up to next 32
+	ADD	$32,R6
+	CMPU	R14,R15		// bytes match?
+	BC	8,2,loop32a	// br ctr and cr
+	BLT	less		// with BE, byte ordering is
+	BGT	greater		// good for compare
+	ANDCC	$24,R8,R9	// Any 8 byte chunks?
+	BEQ	leftover	// and result is 0
+setup8a:
+	SRADCC	$3,R9,R9	// get the 8 byte count
+	BEQ	leftover	// shifted value is 0
+	MOVD	R9,CTR		// loop count for doublewords
+loop8:
+	MOVD	(R5),R9
+	MOVD	(R6),R10
+	ADD	$8,R5
+	ADD	$8,R6
+	CMPU	R9,R10		// match?
+	BC	8,2,loop8	// bt ctr <> 0 && cr
+	BGT	greater
+	BLT	less
+leftover:
+	ANDCC	$7,R8,R9	// check for leftover bytes
+	MOVD	R9,CTR		// save the ctr
+	BNE	simple		// leftover bytes
+	BC	12,10,equal	// test CR2 for length comparison
+	BC	12,8,less
+	BR	greater
+simplecheck:
+	CMP	R8,$0		// remaining compare length 0
+	BNE	simple		// do simple compare
+	BC	12,10,equal	// test CR2 for length comparison
+	BC 	12,8,less	// 1st len < 2nd len, result less
+	BR	greater		// same len, must be equal
+simple:
+	MOVBZ	0(R5),R9	// get byte from 1st operand
+	ADD	$1,R5
+	MOVBZ	0(R6),R10	// get byte from 2nd operand
+	ADD	$1,R6
+	CMPU	R9,R10
+	BC	8,2,simple	// bc ctr <> 0 && cr
+	BGT	greater		// 1st > 2nd
+	BLT	less		// 1st < 2nd
+	BC	12,10,equal	// test CR2 for length comparison
+	BC	12,9,greater	// 2nd len > 1st len
+less:
+	MOVD	$-1,R3
+	MOVD    R3,(R7)		// return value if A < B
+	RET
+equal:
+	MOVD    $0,(R7)		// return value if A == B
+	RET
+greater:
+	MOVD	$1,R3
+	MOVD	R3,(R7)		// return value if A > B
+	RET
diff --git a/src/internal/bytealg/compare_s390x.s b/src/internal/bytealg/compare_s390x.s
new file mode 100644
index 0000000..5394548
--- /dev/null
+++ b/src/internal/bytealg/compare_s390x.s
@@ -0,0 +1,69 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
+	MOVD	a_base+0(FP), R3
+	MOVD	a_len+8(FP), R4
+	MOVD	b_base+24(FP), R5
+	MOVD	b_len+32(FP), R6
+	LA	ret+48(FP), R7
+	BR	cmpbody<>(SB)
+
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	a_base+0(FP), R3
+	MOVD	a_len+8(FP), R4
+	MOVD	b_base+16(FP), R5
+	MOVD	b_len+24(FP), R6
+	LA	ret+32(FP), R7
+	BR	cmpbody<>(SB)
+
+// input:
+//   R3 = a
+//   R4 = alen
+//   R5 = b
+//   R6 = blen
+//   R7 = address of output word (stores -1/0/1 here)
+TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	CMPBEQ	R3, R5, cmplengths
+	MOVD	R4, R8
+	CMPBLE	R4, R6, amin
+	MOVD	R6, R8
+amin:
+	CMPBEQ	R8, $0, cmplengths
+	CMP	R8, $256
+	BLE	tail
+loop:
+	CLC	$256, 0(R3), 0(R5)
+	BGT	gt
+	BLT	lt
+	SUB	$256, R8
+	MOVD	$256(R3), R3
+	MOVD	$256(R5), R5
+	CMP	R8, $256
+	BGT	loop
+tail:
+	SUB	$1, R8
+	EXRL	$cmpbodyclc<>(SB), R8
+	BGT	gt
+	BLT	lt
+cmplengths:
+	CMP	R4, R6
+	BEQ	eq
+	BLT	lt
+gt:
+	MOVD	$1, 0(R7)
+	RET
+lt:
+	MOVD	$-1, 0(R7)
+	RET
+eq:
+	MOVD	$0, 0(R7)
+	RET
+
+TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
+	CLC	$1, 0(R3), 0(R5)
+	RET
diff --git a/src/internal/bytealg/compare_wasm.s b/src/internal/bytealg/compare_wasm.s
new file mode 100644
index 0000000..dc8fb33
--- /dev/null
+++ b/src/internal/bytealg/compare_wasm.s
@@ -0,0 +1,115 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare(SB), NOSPLIT, $0-56
+	Get SP
+	I64Load a_base+0(FP)
+	I64Load a_len+8(FP)
+	I64Load b_base+24(FP)
+	I64Load b_len+32(FP)
+	Call cmpbody<>(SB)
+	I64Store ret+48(FP)
+	RET
+
+TEXT runtime·cmpstring(SB), NOSPLIT, $0-40
+	Get SP
+	I64Load a_base+0(FP)
+	I64Load a_len+8(FP)
+	I64Load b_base+16(FP)
+	I64Load b_len+24(FP)
+	Call cmpbody<>(SB)
+	I64Store ret+32(FP)
+	RET
+
+// params: a, alen, b, blen
+// ret: -1/0/1
+TEXT cmpbody<>(SB), NOSPLIT, $0-0
+	// len = min(alen, blen)
+	Get R1
+	Get R3
+	Get R1
+	Get R3
+	I64LtU
+	Select
+	Set R4
+
+	Get R0
+	I32WrapI64
+	Get R2
+	I32WrapI64
+	Get R4
+	I32WrapI64
+	Call memcmp<>(SB)
+	I64ExtendI32S
+	Tee R5
+
+	I64Eqz
+	If
+		// check length
+		Get R1
+		Get R3
+		I64Sub
+		Set R5
+	End
+
+	I64Const $0
+	I64Const $-1
+	I64Const $1
+	Get R5
+	I64Const $0
+	I64LtS
+	Select
+	Get R5
+	I64Eqz
+	Select
+	Return
+
+// compiled with emscripten
+// params: a, b, len
+// ret: <0/0/>0
+TEXT memcmp<>(SB), NOSPLIT, $0-0
+	Get R2
+	If $1
+	Loop
+	Get R0
+	I32Load8S $0
+	Tee R3
+	Get R1
+	I32Load8S $0
+	Tee R4
+	I32Eq
+	If
+	Get R0
+	I32Const $1
+	I32Add
+	Set R0
+	Get R1
+	I32Const $1
+	I32Add
+	Set R1
+	I32Const $0
+	Get R2
+	I32Const $-1
+	I32Add
+	Tee R2
+	I32Eqz
+	BrIf $3
+	Drop
+	Br $1
+	End
+	End
+	Get R3
+	I32Const $255
+	I32And
+	Get R4
+	I32Const $255
+	I32And
+	I32Sub
+	Else
+	I32Const $0
+	End
+	Return
diff --git a/src/internal/bytealg/count_amd64.s b/src/internal/bytealg/count_amd64.s
new file mode 100644
index 0000000..fa864c4
--- /dev/null
+++ b/src/internal/bytealg/count_amd64.s
@@ -0,0 +1,201 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB),NOSPLIT,$0-40
+	CMPB	internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
+	JEQ	2(PC)
+	JMP	·countGeneric(SB)
+	MOVQ	b_base+0(FP), SI
+	MOVQ	b_len+8(FP), BX
+	MOVB	c+24(FP), AL
+	LEAQ	ret+32(FP), R8
+	JMP	countbody<>(SB)
+
+TEXT ·CountString(SB),NOSPLIT,$0-32
+	CMPB	internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
+	JEQ	2(PC)
+	JMP	·countGenericString(SB)
+	MOVQ	s_base+0(FP), SI
+	MOVQ	s_len+8(FP), BX
+	MOVB	c+16(FP), AL
+	LEAQ	ret+24(FP), R8
+	JMP	countbody<>(SB)
+
+// input:
+//   SI: data
+//   BX: data len
+//   AL: byte sought
+//   R8: address to put result
+// This function requires the POPCNT instruction.
+TEXT countbody<>(SB),NOSPLIT,$0
+	// Shuffle X0 around so that each byte contains
+	// the character we're looking for.
+	MOVD AX, X0
+	PUNPCKLBW X0, X0
+	PUNPCKLBW X0, X0
+	PSHUFL $0, X0, X0
+
+	CMPQ BX, $16
+	JLT small
+
+	MOVQ $0, R12 // Accumulator
+
+	MOVQ SI, DI
+
+	CMPQ BX, $32
+	JA avx2
+sse:
+	LEAQ	-16(SI)(BX*1), AX	// AX = address of last 16 bytes
+	JMP	sseloopentry
+
+sseloop:
+	// Move the next 16-byte chunk of the data into X1.
+	MOVOU	(DI), X1
+	// Compare bytes in X0 to X1.
+	PCMPEQB	X0, X1
+	// Take the top bit of each byte in X1 and put the result in DX.
+	PMOVMSKB X1, DX
+	// Count number of matching bytes
+	POPCNTL DX, DX
+	// Accumulate into R12
+	ADDQ DX, R12
+	// Advance to next block.
+	ADDQ	$16, DI
+sseloopentry:
+	CMPQ	DI, AX
+	JBE	sseloop
+
+	// Get the number of bytes to consider in the last 16 bytes
+	ANDQ $15, BX
+	JZ end
+
+	// Create mask to ignore overlap between previous 16 byte block
+	// and the next.
+	MOVQ $16,CX
+	SUBQ BX, CX
+	MOVQ $0xFFFF, R10
+	SARQ CL, R10
+	SALQ CL, R10
+
+	// Process the last 16-byte chunk. This chunk may overlap with the
+	// chunks we've already searched so we need to mask part of it.
+	MOVOU	(AX), X1
+	PCMPEQB	X0, X1
+	PMOVMSKB X1, DX
+	// Apply mask
+	ANDQ R10, DX
+	POPCNTL DX, DX
+	ADDQ DX, R12
+end:
+	MOVQ R12, (R8)
+	RET
+
+// handle for lengths < 16
+small:
+	TESTQ	BX, BX
+	JEQ	endzero
+
+	// Check if we'll load across a page boundary.
+	LEAQ	16(SI), AX
+	TESTW	$0xff0, AX
+	JEQ	endofpage
+
+	// We must ignore high bytes as they aren't part of our slice.
+	// Create mask.
+	MOVB BX, CX
+	MOVQ $1, R10
+	SALQ CL, R10
+	SUBQ $1, R10
+
+	// Load data
+	MOVOU	(SI), X1
+	// Compare target byte with each byte in data.
+	PCMPEQB	X0, X1
+	// Move result bits to integer register.
+	PMOVMSKB X1, DX
+	// Apply mask
+	ANDQ R10, DX
+	POPCNTL DX, DX
+	// Directly return DX, we don't need to accumulate
+	// since we have <16 bytes.
+	MOVQ	DX, (R8)
+	RET
+endzero:
+	MOVQ $0, (R8)
+	RET
+
+endofpage:
+	// We must ignore low bytes as they aren't part of our slice.
+	MOVQ $16,CX
+	SUBQ BX, CX
+	MOVQ $0xFFFF, R10
+	SARQ CL, R10
+	SALQ CL, R10
+
+	// Load data into the high end of X1.
+	MOVOU	-16(SI)(BX*1), X1
+	// Compare target byte with each byte in data.
+	PCMPEQB	X0, X1
+	// Move result bits to integer register.
+	PMOVMSKB X1, DX
+	// Apply mask
+	ANDQ R10, DX
+	// Directly return DX, we don't need to accumulate
+	// since we have <16 bytes.
+	POPCNTL DX, DX
+	MOVQ	DX, (R8)
+	RET
+
+avx2:
+	CMPB   internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
+	JNE sse
+	MOVD AX, X0
+	LEAQ -32(SI)(BX*1), R11
+	VPBROADCASTB  X0, Y1
+avx2_loop:
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPMOVMSKB Y3, DX
+	POPCNTL DX, DX
+	ADDQ DX, R12
+	ADDQ $32, DI
+	CMPQ DI, R11
+	JLE avx2_loop
+
+	// If last block is already processed,
+	// skip to the end.
+	CMPQ DI, R11
+	JEQ endavx
+
+	// Load address of the last 32 bytes.
+	// There is an overlap with the previous block.
+	MOVQ R11, DI
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPMOVMSKB Y3, DX
+	// Exit AVX mode.
+	VZEROUPPER
+
+	// Create mask to ignore overlap between previous 32 byte block
+	// and the next.
+	ANDQ $31, BX
+	MOVQ $32,CX
+	SUBQ BX, CX
+	MOVQ $0xFFFFFFFF, R10
+	SARQ CL, R10
+	SALQ CL, R10
+	// Apply mask
+	ANDQ R10, DX
+	POPCNTL DX, DX
+	ADDQ DX, R12
+	MOVQ R12, (R8)
+	RET
+endavx:
+	// Exit AVX mode.
+	VZEROUPPER
+	MOVQ R12, (R8)
+	RET
diff --git a/src/internal/bytealg/count_arm.s b/src/internal/bytealg/count_arm.s
new file mode 100644
index 0000000..f704ea0
--- /dev/null
+++ b/src/internal/bytealg/count_arm.s
@@ -0,0 +1,43 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB),NOSPLIT,$0-20
+	MOVW	b_base+0(FP), R0
+	MOVW	b_len+4(FP), R1
+	MOVBU	c+12(FP), R2
+	MOVW	$ret+16(FP), R7
+	B	countbytebody<>(SB)
+
+TEXT ·CountString(SB),NOSPLIT,$0-16
+	MOVW	s_base+0(FP), R0
+	MOVW	s_len+4(FP), R1
+	MOVBU	c+8(FP), R2
+	MOVW	$ret+12(FP), R7
+	B	countbytebody<>(SB)
+
+// Input:
+// R0: data
+// R1: data length
+// R2: byte to find
+// R7: address to put result
+//
+// On exit:
+// R4 and R8 are clobbered
+TEXT countbytebody<>(SB),NOSPLIT,$0
+	MOVW	$0, R8	// R8 = count of byte to search
+	CMP	$0, R1
+	B.EQ	done	// short path to handle 0-byte case
+	ADD	R0, R1	// R1 is the end of the range
+byte_loop:
+	MOVBU.P	1(R0), R4
+	CMP	R4, R2
+	ADD.EQ	$1, R8
+	CMP	R0, R1
+	B.NE	byte_loop
+done:
+	MOVW	R8, (R7)
+	RET
diff --git a/src/internal/bytealg/count_arm64.s b/src/internal/bytealg/count_arm64.s
new file mode 100644
index 0000000..8cd703d
--- /dev/null
+++ b/src/internal/bytealg/count_arm64.s
@@ -0,0 +1,90 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB),NOSPLIT,$0-40
+	MOVD	b_base+0(FP), R0
+	MOVD	b_len+8(FP), R2
+	MOVBU	c+24(FP), R1
+	MOVD	$ret+32(FP), R8
+	B	countbytebody<>(SB)
+
+TEXT ·CountString(SB),NOSPLIT,$0-32
+	MOVD	s_base+0(FP), R0
+	MOVD	s_len+8(FP), R2
+	MOVBU	c+16(FP), R1
+	MOVD	$ret+24(FP), R8
+	B	countbytebody<>(SB)
+
+// input:
+//   R0: data
+//   R2: data len
+//   R1: byte to find
+//   R8: address to put result
+TEXT countbytebody<>(SB),NOSPLIT,$0
+	// R11 = count of byte to search
+	MOVD	$0, R11
+	// short path to handle 0-byte case
+	CBZ	R2, done
+	CMP	$0x20, R2
+	// jump directly to tail if length < 32
+	BLO	tail
+	ANDS	$0x1f, R0, R9
+	BEQ	chunk
+	// Work with not 32-byte aligned head
+	BIC	$0x1f, R0, R3
+	ADD	$0x20, R3
+head_loop:
+	MOVBU.P	1(R0), R5
+	CMP	R5, R1
+	CINC	EQ, R11, R11
+	SUB	$1, R2, R2
+	CMP	R0, R3
+	BNE	head_loop
+	// Work with 32-byte aligned chunks
+chunk:
+	BIC	$0x1f, R2, R9
+	// The first chunk can also be the last
+	CBZ	R9, tail
+	// R3 = end of 32-byte chunks
+	ADD	R0, R9, R3
+	MOVD	$1, R5
+	VMOV	R5, V5.B16
+	// R2 = length of tail
+	SUB	R9, R2, R2
+	// Duplicate R1 (byte to search) to 16 1-byte elements of V0
+	VMOV	R1, V0.B16
+	// Clear the low 64-bit element of V7 and V8
+	VEOR	V7.B8, V7.B8, V7.B8
+	VEOR	V8.B8, V8.B8, V8.B8
+	// Count the target byte in 32-byte chunk
+chunk_loop:
+	VLD1.P	(R0), [V1.B16, V2.B16]
+	CMP	R0, R3
+	VCMEQ	V0.B16, V1.B16, V3.B16
+	VCMEQ	V0.B16, V2.B16, V4.B16
+	// Clear the higher 7 bits
+	VAND	V5.B16, V3.B16, V3.B16
+	VAND	V5.B16, V4.B16, V4.B16
+	// Count lanes match the requested byte
+	VADDP	V4.B16, V3.B16, V6.B16 // 32B->16B
+	VUADDLV	V6.B16, V7
+	// Accumulate the count in low 64-bit element of V8 when inside the loop
+	VADD	V7, V8
+	BNE	chunk_loop
+	VMOV	V8.D[0], R6
+	ADD	R6, R11, R11
+	CBZ	R2, done
+tail:
+	// Work with tail shorter than 32 bytes
+	MOVBU.P	1(R0), R5
+	SUB	$1, R2, R2
+	CMP	R5, R1
+	CINC	EQ, R11, R11
+	CBNZ	R2, tail
+done:
+	MOVD	R11, (R8)
+	RET
diff --git a/src/internal/bytealg/count_generic.go b/src/internal/bytealg/count_generic.go
new file mode 100644
index 0000000..1891d29
--- /dev/null
+++ b/src/internal/bytealg/count_generic.go
@@ -0,0 +1,28 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !arm && !arm64 && !ppc64le && !ppc64 && !riscv64 && !s390x
+// +build !amd64,!arm,!arm64,!ppc64le,!ppc64,!riscv64,!s390x
+
+package bytealg
+
+func Count(b []byte, c byte) int {
+	n := 0
+	for _, x := range b {
+		if x == c {
+			n++
+		}
+	}
+	return n
+}
+
+func CountString(s string, c byte) int {
+	n := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == c {
+			n++
+		}
+	}
+	return n
+}
diff --git a/src/internal/bytealg/count_native.go b/src/internal/bytealg/count_native.go
new file mode 100644
index 0000000..a19a6f8
--- /dev/null
+++ b/src/internal/bytealg/count_native.go
@@ -0,0 +1,34 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm || arm64 || ppc64le || ppc64 || riscv64 || s390x
+// +build amd64 arm arm64 ppc64le ppc64 riscv64 s390x
+
+package bytealg
+
+//go:noescape
+func Count(b []byte, c byte) int
+
+//go:noescape
+func CountString(s string, c byte) int
+
+// A backup implementation to use by assembly.
+func countGeneric(b []byte, c byte) int {
+	n := 0
+	for _, x := range b {
+		if x == c {
+			n++
+		}
+	}
+	return n
+}
+func countGenericString(s string, c byte) int {
+	n := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == c {
+			n++
+		}
+	}
+	return n
+}
diff --git a/src/internal/bytealg/count_ppc64x.s b/src/internal/bytealg/count_ppc64x.s
new file mode 100644
index 0000000..94163cb
--- /dev/null
+++ b/src/internal/bytealg/count_ppc64x.s
@@ -0,0 +1,98 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64le || ppc64
+// +build ppc64le ppc64
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
+	MOVD  b_base+0(FP), R3    // R3 = byte array pointer
+	MOVD  b_len+8(FP), R4     // R4 = length
+	MOVBZ c+24(FP), R5        // R5 = byte
+	MOVD  $ret+32(FP), R14    // R14 = &ret
+	BR    countbytebody<>(SB)
+
+TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD  s_base+0(FP), R3    // R3 = string
+	MOVD  s_len+8(FP), R4     // R4 = length
+	MOVBZ c+16(FP), R5        // R5 = byte
+	MOVD  $ret+24(FP), R14    // R14 = &ret
+	BR    countbytebody<>(SB)
+
+// R3: addr of string
+// R4: len of string
+// R5: byte to count
+// R14: addr for return value
+// endianness shouldn't matter since we are just counting and order
+// is irrelevant
+TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
+	DCBT (R3)    // Prepare cache line.
+	MOVD R0, R18 // byte count
+	MOVD R3, R19 // Save base address for calculating the index later.
+	MOVD R4, R16
+
+	MOVD   R5, R6
+	RLDIMI $8, R6, $48, R6
+	RLDIMI $16, R6, $32, R6
+	RLDIMI $32, R6, $0, R6  // fill reg with the byte to count
+
+	VSPLTISW $3, V4     // used for shift
+	MTVRD    R6, V1     // move compare byte
+	VSPLTB   $7, V1, V1 // replicate byte across V1
+
+	CMPU   R4, $32          // Check if it's a small string (<32 bytes)
+	BLT    tail             // Jump to the small string case
+	XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator
+
+cmploop:
+	LXVW4X (R3), VS32 // load bytes from string
+
+	// when the bytes match, the corresponding byte contains all 1s
+	VCMPEQUB V1, V0, V2     // compare bytes
+	VPOPCNTD V2, V3         // each double word contains its count
+	VADDUDM  V3, V5, V5     // accumulate bit count in each double word
+	ADD      $16, R3, R3    // increment pointer
+	SUB      $16, R16, R16  // remaining bytes
+	CMP      R16, $16       // at least 16 remaining?
+	BGE      cmploop
+	VSRD     V5, V4, V5     // shift by 3 to convert bits to bytes
+	VSLDOI   $8, V5, V5, V6 // get the double word values from vector
+	MFVSRD   V5, R9
+	MFVSRD   V6, R10
+	ADD      R9, R10, R9
+	ADD      R9, R18, R18
+
+tail:
+	CMP R16, $8 // 8 bytes left?
+	BLT small
+
+	MOVD    (R3), R12     // load 8 bytes
+	CMPB    R12, R6, R17  // compare bytes
+	POPCNTD R17, R15      // bit count
+	SRD     $3, R15, R15  // byte count
+	ADD     R15, R18, R18 // add to byte count
+
+next1:
+	ADD $8, R3, R3
+	SUB $8, R16, R16 // remaining bytes
+	BR  tail
+
+small:
+	CMP   $0, R16   // any remaining
+	BEQ   done
+	MOVBZ (R3), R12 // check each remaining byte
+	CMP   R12, R5
+	BNE   next2
+	ADD   $1, R18
+
+next2:
+	SUB $1, R16
+	ADD $1, R3  // inc address
+	BR  small
+
+done:
+	MOVD R18, (R14) // return count
+	RET
diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s
new file mode 100644
index 0000000..3f4eb23
--- /dev/null
+++ b/src/internal/bytealg/count_riscv64.s
@@ -0,0 +1,44 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB),NOSPLIT,$0-40
+	MOV	b_base+0(FP), A1
+	MOV	b_len+8(FP), A2
+	MOVBU	c+24(FP), A3	// byte to count
+	MOV	ZERO, A4	// count
+	ADD	A1, A2		// end
+
+loop:
+	BEQ	A1, A2, done
+	MOVBU	(A1), A5
+	ADD	$1, A1
+	BNE	A3, A5, loop
+	ADD	$1, A4
+	JMP	loop
+
+done:
+	MOV	A4, ret+32(FP)
+	RET
+
+TEXT ·CountString(SB),NOSPLIT,$0-32
+	MOV	s_base+0(FP), A1
+	MOV	s_len+8(FP), A2
+	MOVBU	c+16(FP), A3	// byte to count
+	MOV	ZERO, A4	// count
+	ADD	A1, A2		// end
+
+loop:
+	BEQ	A1, A2, done
+	MOVBU	(A1), A5
+	ADD	$1, A1
+	BNE	A3, A5, loop
+	ADD	$1, A4
+	JMP	loop
+
+done:
+	MOV	A4, ret+24(FP)
+	RET
diff --git a/src/internal/bytealg/count_s390x.s b/src/internal/bytealg/count_s390x.s
new file mode 100644
index 0000000..2a3b5c0
--- /dev/null
+++ b/src/internal/bytealg/count_s390x.s
@@ -0,0 +1,169 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// condition code masks
+#define EQ 8
+#define NE 7
+
+// register assignments
+#define R_ZERO R0
+#define R_VAL  R1
+#define R_TMP  R2
+#define R_PTR  R3
+#define R_LEN  R4
+#define R_CHAR R5
+#define R_RET  R6
+#define R_ITER R7
+#define R_CNT  R8
+#define R_MPTR R9
+
+// vector register assignments
+#define V_ZERO V0
+#define V_CHAR V1
+#define V_MASK V2
+#define V_VAL  V3
+#define V_CNT  V4
+
+// mask for trailing bytes in vector implementation
+GLOBL countbytemask<>(SB), RODATA, $16
+DATA countbytemask<>+0(SB)/8, $0x0101010101010101
+DATA countbytemask<>+8(SB)/8, $0x0101010101010101
+
+// func Count(b []byte, c byte) int
+TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40
+	LMG   b+0(FP), R_PTR, R_LEN
+	MOVBZ c+24(FP), R_CHAR
+	MOVD  $ret+32(FP), R_RET
+	BR    countbytebody<>(SB)
+
+// func CountString(s string, c byte) int
+TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32
+	LMG   s+0(FP), R_PTR, R_LEN
+	MOVBZ c+16(FP), R_CHAR
+	MOVD  $ret+24(FP), R_RET
+	BR    countbytebody<>(SB)
+
+// input:
+// R_PTR  = address of array of bytes
+// R_LEN  = number of bytes in array
+// R_CHAR = byte value to count zero (extended to register width)
+// R_RET  = address of return value
+TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0
+	MOVD  $internal∕cpu·S390X+const_offsetS390xHasVX(SB), R_TMP
+	MOVD  $countbytemask<>(SB), R_MPTR
+	CGIJ  $EQ, R_LEN, $0, ret0 // return if length is 0.
+	SRD   $4, R_LEN, R_ITER    // R_ITER is the number of 16-byte chunks
+	MOVBZ (R_TMP), R_TMP       // load bool indicating support for vector facility
+	CGIJ  $EQ, R_TMP, $0, novx // jump to scalar code if the vector facility is not available
+
+	// Start of vector code (have vector facility).
+	//
+	// Set R_LEN to be the length mod 16 minus 1 to use as an index for
+	// vector 'load with length' (VLL). It will be in the range [-1,14].
+	// Also replicate c across a 16-byte vector and initialize V_ZERO.
+	ANDW  $0xf, R_LEN
+	VLVGB $0, R_CHAR, V_CHAR // V_CHAR = [16]byte{c, 0, ..., 0, 0}
+	VZERO V_ZERO             // V_ZERO = [1]uint128{0}
+	ADDW  $-1, R_LEN
+	VREPB $0, V_CHAR, V_CHAR // V_CHAR = [16]byte{c, c, ..., c, c}
+
+	// Jump to loop if we have more than 15 bytes to process.
+	CGIJ $NE, R_ITER, $0, vxchunks
+
+	// Load 1-15 bytes and corresponding mask.
+	// Note: only the low 32-bits of R_LEN are used for the index.
+	VLL R_LEN, (R_PTR), V_VAL
+	VLL R_LEN, (R_MPTR), V_MASK
+
+	// Compare each byte in input chunk against byte to be counted.
+	// Each byte element will be set to either 0 (no match) or 1 (match).
+	VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
+	VN    V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
+
+	// Accumulate matched byte count in 128-bit integer value.
+	VSUMB  V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
+	VSUMQF V_VAL, V_ZERO, V_CNT // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
+
+	// Return rightmost (lowest) 64-bit part of accumulator.
+	VSTEG $1, V_CNT, (R_RET)
+	RET
+
+vxchunks:
+	// Load 0x01 into every byte element in the 16-byte mask vector.
+	VREPIB $1, V_MASK // V_MASK = [16]byte{1, 1, ..., 1, 1}
+	VZERO  V_CNT      // initial uint128 count of 0
+
+vxloop:
+	// Load input bytes in 16-byte chunks.
+	VL (R_PTR), V_VAL
+
+	// Compare each byte in input chunk against byte to be counted.
+	// Each byte element will be set to either 0 (no match) or 1 (match).
+	VCEQB V_CHAR, V_VAL, V_VAL // each byte will be either 0xff or 0x00
+	VN    V_MASK, V_VAL, V_VAL // mask out most significant 7 bits
+
+	// Increment input string address.
+	MOVD $16(R_PTR), R_PTR
+
+	// Accumulate matched byte count in 128-bit integer value.
+	VSUMB  V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
+	VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
+	VAQ    V_VAL, V_CNT, V_CNT  // accumulate
+
+	// Repeat until all 16-byte chunks are done.
+	BRCTG R_ITER, vxloop
+
+	// Skip to end if there are no trailing bytes.
+	CIJ $EQ, R_LEN, $-1, vxret
+
+	// Load 1-15 bytes and corresponding mask.
+	// Note: only the low 32-bits of R_LEN are used for the index.
+	VLL R_LEN, (R_PTR), V_VAL
+	VLL R_LEN, (R_MPTR), V_MASK
+
+	// Compare each byte in input chunk against byte to be counted.
+	// Each byte element will be set to either 0 (no match) or 1 (match).
+	VCEQB V_CHAR, V_VAL, V_VAL
+	VN    V_MASK, V_VAL, V_VAL
+
+	// Accumulate matched byte count in 128-bit integer value.
+	VSUMB  V_VAL, V_ZERO, V_VAL // [16]byte{x0, x1, ..., x14, x15} → [4]uint32{x0+x1+x2+x3, ..., x12+x13+x14+x15}
+	VSUMQF V_VAL, V_ZERO, V_VAL // [4]uint32{x0, x1, x2, x3} → [1]uint128{x0+x1+x2+x3}
+	VAQ    V_VAL, V_CNT, V_CNT  // accumulate
+
+vxret:
+	// Return rightmost (lowest) 64-bit part of accumulator.
+	VSTEG $1, V_CNT, (R_RET)
+	RET
+
+novx:
+	// Start of non-vector code (the vector facility not available).
+	//
+	// Initialise counter and constant zero.
+	MOVD $0, R_CNT
+	MOVD $0, R_ZERO
+
+loop:
+	// Read 1-byte from input and compare.
+	// Note: avoid putting LOCGR in critical path.
+	MOVBZ (R_PTR), R_VAL
+	MOVD  $1, R_TMP
+	MOVD  $1(R_PTR), R_PTR
+	CMPW  R_VAL, R_CHAR
+	LOCGR $NE, R_ZERO, R_TMP // select 0 if no match (1 if there is a match)
+	ADD   R_TMP, R_CNT       // accumulate 64-bit result
+
+	// Repeat until all bytes have been checked.
+	BRCTG R_LEN, loop
+
+ret:
+	MOVD R_CNT, (R_RET)
+	RET
+
+ret0:
+	MOVD $0, (R_RET)
+	RET
diff --git a/src/internal/bytealg/equal_386.s b/src/internal/bytealg/equal_386.s
new file mode 100644
index 0000000..8723363
--- /dev/null
+++ b/src/internal/bytealg/equal_386.s
@@ -0,0 +1,129 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-13
+	MOVL	a+0(FP), SI
+	MOVL	b+4(FP), DI
+	CMPL	SI, DI
+	JEQ	eq
+	MOVL	size+8(FP), BX
+	LEAL	ret+12(FP), AX
+	JMP	memeqbody<>(SB)
+eq:
+	MOVB    $1, ret+12(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
+	MOVL    a+0(FP), SI
+	MOVL    b+4(FP), DI
+	CMPL    SI, DI
+	JEQ     eq
+	MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
+	LEAL	ret+8(FP), AX
+	JMP	memeqbody<>(SB)
+eq:
+	MOVB    $1, ret+8(FP)
+	RET
+
+// a in SI
+// b in DI
+// count in BX
+// address of result byte in AX
+TEXT memeqbody<>(SB),NOSPLIT,$0-0
+	CMPL	BX, $4
+	JB	small
+
+	// 64 bytes at a time using xmm registers
+hugeloop:
+	CMPL	BX, $64
+	JB	bigloop
+	CMPB	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
+	JNE	bigloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	MOVOU	16(SI), X2
+	MOVOU	16(DI), X3
+	MOVOU	32(SI), X4
+	MOVOU	32(DI), X5
+	MOVOU	48(SI), X6
+	MOVOU	48(DI), X7
+	PCMPEQB	X1, X0
+	PCMPEQB	X3, X2
+	PCMPEQB	X5, X4
+	PCMPEQB	X7, X6
+	PAND	X2, X0
+	PAND	X6, X4
+	PAND	X4, X0
+	PMOVMSKB X0, DX
+	ADDL	$64, SI
+	ADDL	$64, DI
+	SUBL	$64, BX
+	CMPL	DX, $0xffff
+	JEQ	hugeloop
+	MOVB	$0, (AX)
+	RET
+
+	// 4 bytes at a time using 32-bit register
+bigloop:
+	CMPL	BX, $4
+	JBE	leftover
+	MOVL	(SI), CX
+	MOVL	(DI), DX
+	ADDL	$4, SI
+	ADDL	$4, DI
+	SUBL	$4, BX
+	CMPL	CX, DX
+	JEQ	bigloop
+	MOVB	$0, (AX)
+	RET
+
+	// remaining 0-4 bytes
+leftover:
+	MOVL	-4(SI)(BX*1), CX
+	MOVL	-4(DI)(BX*1), DX
+	CMPL	CX, DX
+	SETEQ	(AX)
+	RET
+
+small:
+	CMPL	BX, $0
+	JEQ	equal
+
+	LEAL	0(BX*8), CX
+	NEGL	CX
+
+	MOVL	SI, DX
+	CMPB	DX, $0xfc
+	JA	si_high
+
+	// load at SI won't cross a page boundary.
+	MOVL	(SI), SI
+	JMP	si_finish
+si_high:
+	// address ends in 111111xx. Load up to bytes we want, move to correct position.
+	MOVL	-4(SI)(BX*1), SI
+	SHRL	CX, SI
+si_finish:
+
+	// same for DI.
+	MOVL	DI, DX
+	CMPB	DX, $0xfc
+	JA	di_high
+	MOVL	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVL	-4(DI)(BX*1), DI
+	SHRL	CX, DI
+di_finish:
+
+	SUBL	SI, DI
+	SHLL	CX, DI
+equal:
+	SETEQ	(AX)
+	RET
diff --git a/src/internal/bytealg/equal_amd64.s b/src/internal/bytealg/equal_amd64.s
new file mode 100644
index 0000000..6f12d2a
--- /dev/null
+++ b/src/internal/bytealg/equal_amd64.s
@@ -0,0 +1,210 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
+#ifdef GOEXPERIMENT_regabiargs
+	// AX = a    (want in SI)
+	// BX = b    (want in DI)
+	// CX = size (want in BX)
+	CMPQ	AX, BX
+	JNE	neq
+	MOVQ	$1, AX	// return 1
+	RET
+neq:
+	MOVQ	AX, SI
+	MOVQ	BX, DI
+	MOVQ	CX, BX
+	JMP	memeqbody<>(SB)
+#else
+	MOVQ	a+0(FP), SI
+	MOVQ	b+8(FP), DI
+	CMPQ	SI, DI
+	JEQ	eq
+	MOVQ	size+16(FP), BX
+	LEAQ	ret+24(FP), AX
+	JMP	memeqbody<>(SB)
+eq:
+	MOVB	$1, ret+24(FP)
+	RET
+#endif
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
+#ifdef GOEXPERIMENT_regabiargs
+	// AX = a       (want in SI)
+	// BX = b       (want in DI)
+	// 8(DX) = size (want in BX)
+	CMPQ	AX, BX
+	JNE	neq
+	MOVQ	$1, AX	// return 1
+	RET
+neq:
+	MOVQ	AX, SI
+	MOVQ	BX, DI
+	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
+	JMP	memeqbody<>(SB)
+#else
+	MOVQ	a+0(FP), SI
+	MOVQ	b+8(FP), DI
+	CMPQ	SI, DI
+	JEQ	eq
+	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
+	LEAQ	ret+16(FP), AX
+	JMP	memeqbody<>(SB)
+eq:
+	MOVB	$1, ret+16(FP)
+	RET
+#endif
+
+// Input:
+//   a in SI
+//   b in DI
+//   count in BX
+#ifndef GOEXPERIMENT_regabiargs
+//   address of result byte in AX
+#else
+// Output:
+//   result in AX
+#endif
+TEXT memeqbody<>(SB),NOSPLIT,$0-0
+	CMPQ	BX, $8
+	JB	small
+	CMPQ	BX, $64
+	JB	bigloop
+	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
+	JE	hugeloop_avx2
+
+	// 64 bytes at a time using xmm registers
+hugeloop:
+	CMPQ	BX, $64
+	JB	bigloop
+	MOVOU	(SI), X0
+	MOVOU	(DI), X1
+	MOVOU	16(SI), X2
+	MOVOU	16(DI), X3
+	MOVOU	32(SI), X4
+	MOVOU	32(DI), X5
+	MOVOU	48(SI), X6
+	MOVOU	48(DI), X7
+	PCMPEQB	X1, X0
+	PCMPEQB	X3, X2
+	PCMPEQB	X5, X4
+	PCMPEQB	X7, X6
+	PAND	X2, X0
+	PAND	X6, X4
+	PAND	X4, X0
+	PMOVMSKB X0, DX
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, BX
+	CMPL	DX, $0xffff
+	JEQ	hugeloop
+#ifdef GOEXPERIMENT_regabiargs
+	XORQ	AX, AX	// return 0
+#else
+	MOVB	$0, (AX)
+#endif
+	RET
+
+	// 64 bytes at a time using ymm registers
+hugeloop_avx2:
+	CMPQ	BX, $64
+	JB	bigloop_avx2
+	VMOVDQU	(SI), Y0
+	VMOVDQU	(DI), Y1
+	VMOVDQU	32(SI), Y2
+	VMOVDQU	32(DI), Y3
+	VPCMPEQB	Y1, Y0, Y4
+	VPCMPEQB	Y2, Y3, Y5
+	VPAND	Y4, Y5, Y6
+	VPMOVMSKB Y6, DX
+	ADDQ	$64, SI
+	ADDQ	$64, DI
+	SUBQ	$64, BX
+	CMPL	DX, $0xffffffff
+	JEQ	hugeloop_avx2
+	VZEROUPPER
+#ifdef GOEXPERIMENT_regabiargs
+	XORQ	AX, AX	// return 0
+#else
+	MOVB	$0, (AX)
+#endif
+	RET
+
+bigloop_avx2:
+	VZEROUPPER
+
+	// 8 bytes at a time using 64-bit register
+bigloop:
+	CMPQ	BX, $8
+	JBE	leftover
+	MOVQ	(SI), CX
+	MOVQ	(DI), DX
+	ADDQ	$8, SI
+	ADDQ	$8, DI
+	SUBQ	$8, BX
+	CMPQ	CX, DX
+	JEQ	bigloop
+#ifdef GOEXPERIMENT_regabiargs
+	XORQ	AX, AX	// return 0
+#else
+	MOVB	$0, (AX)
+#endif
+	RET
+
+	// remaining 0-8 bytes
+leftover:
+	MOVQ	-8(SI)(BX*1), CX
+	MOVQ	-8(DI)(BX*1), DX
+	CMPQ	CX, DX
+#ifdef GOEXPERIMENT_regabiargs
+	SETEQ	AX
+#else
+	SETEQ	(AX)
+#endif
+	RET
+
+small:
+	CMPQ	BX, $0
+	JEQ	equal
+
+	LEAQ	0(BX*8), CX
+	NEGQ	CX
+
+	CMPB	SI, $0xf8
+	JA	si_high
+
+	// load at SI won't cross a page boundary.
+	MOVQ	(SI), SI
+	JMP	si_finish
+si_high:
+	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
+	MOVQ	-8(SI)(BX*1), SI
+	SHRQ	CX, SI
+si_finish:
+
+	// same for DI.
+	CMPB	DI, $0xf8
+	JA	di_high
+	MOVQ	(DI), DI
+	JMP	di_finish
+di_high:
+	MOVQ	-8(DI)(BX*1), DI
+	SHRQ	CX, DI
+di_finish:
+
+	SUBQ	SI, DI
+	SHLQ	CX, DI
+equal:
+#ifdef GOEXPERIMENT_regabiargs
+	SETEQ	AX
+#else
+	SETEQ	(AX)
+#endif
+	RET
+
diff --git a/src/internal/bytealg/equal_arm.s b/src/internal/bytealg/equal_arm.s
new file mode 100644
index 0000000..a6c4369
--- /dev/null
+++ b/src/internal/bytealg/equal_arm.s
@@ -0,0 +1,91 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-13
+	MOVW	a+0(FP), R0
+	MOVW	b+4(FP), R2
+	CMP	R0, R2
+	B.EQ	eq
+	MOVW	size+8(FP), R1
+	CMP	$0, R1
+	B.EQ	eq		// short path to handle 0-byte case
+	MOVW	$ret+12(FP), R7
+	B	memeqbody<>(SB)
+eq:
+	MOVW	$1, R0
+	MOVB	R0, ret+12(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-9
+	MOVW	a+0(FP), R0
+	MOVW	b+4(FP), R2
+	CMP	R0, R2
+	B.EQ	eq
+	MOVW	4(R7), R1	// compiler stores size at offset 4 in the closure
+	CMP	$0, R1
+	B.EQ	eq		// short path to handle 0-byte case
+	MOVW	$ret+8(FP), R7
+	B	memeqbody<>(SB)
+eq:
+	MOVW	$1, R0
+	MOVB	R0, ret+8(FP)
+	RET
+
+// Input:
+// R0: data of a
+// R1: length
+// R2: data of b
+// R7: points to return value
+//
+// On exit:
+// R4, R5 and R6 are clobbered
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	CMP	$1, R1
+	B.EQ	one		// 1-byte special case for better performance
+
+	CMP	$4, R1
+	ADD	R0, R1		// R1 is the end of the range to compare
+	B.LT	byte_loop	// length < 4
+	AND	$3, R0, R6
+	CMP	$0, R6
+	B.NE	byte_loop	// unaligned a, use byte-wise compare (TODO: try to align a)
+	AND	$3, R2, R6
+	CMP	$0, R6
+	B.NE	byte_loop	// unaligned b, use byte-wise compare
+	AND	$0xfffffffc, R1, R6
+	// length >= 4
+chunk4_loop:
+	MOVW.P	4(R0), R4
+	MOVW.P	4(R2), R5
+	CMP	R4, R5
+	B.NE	notequal
+	CMP	R0, R6
+	B.NE	chunk4_loop
+	CMP	R0, R1
+	B.EQ	equal		// reached the end
+byte_loop:
+	MOVBU.P	1(R0), R4
+	MOVBU.P	1(R2), R5
+	CMP	R4, R5
+	B.NE	notequal
+	CMP	R0, R1
+	B.NE	byte_loop
+equal:
+	MOVW	$1, R0
+	MOVB	R0, (R7)
+	RET
+one:
+	MOVBU	(R0), R4
+	MOVBU	(R2), R5
+	CMP	R4, R5
+	B.EQ	equal
+notequal:
+	MOVW	$0, R0
+	MOVB	R0, (R7)
+	RET
diff --git a/src/internal/bytealg/equal_arm64.s b/src/internal/bytealg/equal_arm64.s
new file mode 100644
index 0000000..01aa7b7
--- /dev/null
+++ b/src/internal/bytealg/equal_arm64.s
@@ -0,0 +1,136 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOVD	size+16(FP), R1
+	// short path to handle 0-byte case
+	CBZ	R1, equal
+	MOVD	a+0(FP), R0
+	MOVD	b+8(FP), R2
+	MOVD	$ret+24(FP), R8
+	B	memeqbody<>(SB)
+equal:
+	MOVD	$1, R0
+	MOVB	R0, ret+24(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+	MOVD	a+0(FP), R3
+	MOVD	b+8(FP), R4
+	CMP	R3, R4
+	BEQ	eq
+	MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
+	CBZ	R5, eq
+	MOVD	R3, 8(RSP)
+	MOVD	R4, 16(RSP)
+	MOVD	R5, 24(RSP)
+	BL	runtime·memequal(SB)
+	MOVBU	32(RSP), R3
+	MOVB	R3, ret+16(FP)
+	RET
+eq:
+	MOVD	$1, R3
+	MOVB	R3, ret+16(FP)
+	RET
+
+// input:
+// R0: pointer a
+// R1: data len
+// R2: pointer b
+// R8: address to put result
+TEXT memeqbody<>(SB),NOSPLIT,$0
+	CMP	$1, R1
+	// handle 1-byte special case for better performance
+	BEQ	one
+	CMP	$16, R1
+	// handle specially if length < 16
+	BLO	tail
+	BIC	$0x3f, R1, R3
+	CBZ	R3, chunk16
+	// work with 64-byte chunks
+	ADD	R3, R0, R6	// end of chunks
+chunk64_loop:
+	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
+	VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
+	VCMEQ	V0.D2, V4.D2, V8.D2
+	VCMEQ	V1.D2, V5.D2, V9.D2
+	VCMEQ	V2.D2, V6.D2, V10.D2
+	VCMEQ	V3.D2, V7.D2, V11.D2
+	VAND	V8.B16, V9.B16, V8.B16
+	VAND	V8.B16, V10.B16, V8.B16
+	VAND	V8.B16, V11.B16, V8.B16
+	CMP	R0, R6
+	VMOV	V8.D[0], R4
+	VMOV	V8.D[1], R5
+	CBZ	R4, not_equal
+	CBZ	R5, not_equal
+	BNE	chunk64_loop
+	AND	$0x3f, R1, R1
+	CBZ	R1, equal
+chunk16:
+	// work with 16-byte chunks
+	BIC	$0xf, R1, R3
+	CBZ	R3, tail
+	ADD	R3, R0, R6	// end of chunks
+chunk16_loop:
+	LDP.P	16(R0), (R4, R5)
+	LDP.P	16(R2), (R7, R9)
+	EOR	R4, R7
+	CBNZ	R7, not_equal
+	EOR	R5, R9
+	CBNZ	R9, not_equal
+	CMP	R0, R6
+	BNE	chunk16_loop
+	AND	$0xf, R1, R1
+	CBZ	R1, equal
+tail:
+	// special compare of tail with length < 16
+	TBZ	$3, R1, lt_8
+	MOVD	(R0), R4
+	MOVD	(R2), R5
+	EOR	R4, R5
+	CBNZ	R5, not_equal
+	SUB	$8, R1, R6	// offset of the last 8 bytes
+	MOVD	(R0)(R6), R4
+	MOVD	(R2)(R6), R5
+	EOR	R4, R5
+	CBNZ	R5, not_equal
+	B	equal
+lt_8:
+	TBZ	$2, R1, lt_4
+	MOVWU	(R0), R4
+	MOVWU	(R2), R5
+	EOR	R4, R5
+	CBNZ	R5, not_equal
+	SUB	$4, R1, R6	// offset of the last 4 bytes
+	MOVWU	(R0)(R6), R4
+	MOVWU	(R2)(R6), R5
+	EOR	R4, R5
+	CBNZ	R5, not_equal
+	B	equal
+lt_4:
+	TBZ	$1, R1, lt_2
+	MOVHU.P	2(R0), R4
+	MOVHU.P	2(R2), R5
+	CMP	R4, R5
+	BNE	not_equal
+lt_2:
+	TBZ	$0, R1, equal
+one:
+	MOVBU	(R0), R4
+	MOVBU	(R2), R5
+	CMP	R4, R5
+	BNE	not_equal
+equal:
+	MOVD	$1, R0
+	MOVB	R0, (R8)
+	RET
+not_equal:
+	MOVB	ZR, (R8)
+	RET
diff --git a/src/internal/bytealg/equal_generic.go b/src/internal/bytealg/equal_generic.go
new file mode 100644
index 0000000..59bdf8f
--- /dev/null
+++ b/src/internal/bytealg/equal_generic.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+// Equal reports whether a and b
+// are the same length and contain the same bytes.
+// A nil argument is equivalent to an empty slice.
+//
+// Equal is equivalent to bytes.Equal.
+// It is provided here for convenience,
+// because some packages cannot depend on bytes.
+func Equal(a, b []byte) bool {
+	// Neither cmd/compile nor gccgo allocates for these string conversions.
+	// There is a test for this in package bytes.
+	return string(a) == string(b)
+}
diff --git a/src/internal/bytealg/equal_mips64x.s b/src/internal/bytealg/equal_mips64x.s
new file mode 100644
index 0000000..c2f7d39
--- /dev/null
+++ b/src/internal/bytealg/equal_mips64x.s
@@ -0,0 +1,119 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define	REGCTXT	R22
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOVV	a+0(FP), R1
+	MOVV	b+8(FP), R2
+	BEQ	R1, R2, eq
+	MOVV	size+16(FP), R3
+	ADDV	R1, R3, R4
+
+	// chunk size is 16
+	SGTU	$16, R3, R8
+	BEQ	R0, R8, chunk_entry
+
+byte_loop:
+	BNE	R1, R4, byte_test
+	MOVV	$1, R1
+	MOVB	R1, ret+24(FP)
+	RET
+byte_test:
+	MOVBU	(R1), R6
+	ADDV	$1, R1
+	MOVBU	(R2), R7
+	ADDV	$1, R2
+	BEQ	R6, R7, byte_loop
+	JMP	not_eq
+
+chunk_entry:
+	// make sure both a and b are aligned
+	OR	R1, R2, R9
+	AND	$0x7, R9
+	BNE	R0, R9, byte_loop
+	JMP	chunk_loop_1
+
+chunk_loop:
+	// chunk size is 16
+	SGTU	$16, R3, R8
+	BNE	R0, R8, chunk_tail_8
+chunk_loop_1:
+	MOVV	(R1), R6
+	MOVV	(R2), R7
+	BNE	R6, R7, not_eq
+	MOVV	8(R1), R12
+	MOVV	8(R2), R13
+	ADDV	$16, R1
+	ADDV	$16, R2
+	SUBV	$16, R3
+	BEQ	R12, R13, chunk_loop
+	JMP	not_eq
+
+chunk_tail_8:
+	AND	$8, R3, R14
+	BEQ	R0, R14, chunk_tail_4
+	MOVV	(R1), R6
+	MOVV	(R2), R7
+	BNE	R6, R7, not_eq
+	ADDV	$8, R1
+	ADDV	$8, R2
+
+chunk_tail_4:
+	AND	$4, R3, R14
+	BEQ	R0, R14, chunk_tail_2
+	MOVWU	(R1), R6
+	MOVWU	(R2), R7
+	BNE	R6, R7, not_eq
+	ADDV	$4, R1
+	ADDV	$4, R2
+
+chunk_tail_2:
+	AND	$2, R3, R14
+	BEQ	R0, R14, chunk_tail_1
+	MOVHU	(R1), R6
+	MOVHU	(R2), R7
+	BNE	R6, R7, not_eq
+	ADDV	$2, R1
+	ADDV	$2, R2
+
+chunk_tail_1:
+	AND	$1, R3, R14
+	BEQ	R0, R14, eq
+	MOVBU	(R1), R6
+	MOVBU	(R2), R7
+	BEQ	R6, R7, eq
+
+not_eq:
+	MOVB	R0, ret+24(FP)
+	RET
+eq:
+	MOVV	$1, R1
+	MOVB	R1, ret+24(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+	MOVV	a+0(FP), R1
+	MOVV	b+8(FP), R2
+	BEQ	R1, R2, eq
+	MOVV	8(REGCTXT), R3    // compiler stores size at offset 8 in the closure
+	MOVV	R1, 8(R29)
+	MOVV	R2, 16(R29)
+	MOVV	R3, 24(R29)
+	JAL	runtime·memequal(SB)
+	MOVBU	32(R29), R1
+	MOVB	R1, ret+16(FP)
+	RET
+eq:
+	MOVV	$1, R1
+	MOVB	R1, ret+16(FP)
+	RET
diff --git a/src/internal/bytealg/equal_mipsx.s b/src/internal/bytealg/equal_mipsx.s
new file mode 100644
index 0000000..11e5549
--- /dev/null
+++ b/src/internal/bytealg/equal_mipsx.s
@@ -0,0 +1,63 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+// +build mips mipsle
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define	REGCTXT	R22
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-13
+	MOVW	a+0(FP), R1
+	MOVW	b+4(FP), R2
+	BEQ	R1, R2, eq
+	MOVW	size+8(FP), R3
+	ADDU	R1, R3, R4
+loop:
+	BNE	R1, R4, test
+	MOVW	$1, R1
+	MOVB	R1, ret+12(FP)
+	RET
+test:
+	MOVBU	(R1), R6
+	ADDU	$1, R1
+	MOVBU	(R2), R7
+	ADDU	$1, R2
+	BEQ	R6, R7, loop
+
+	MOVB	R0, ret+12(FP)
+	RET
+eq:
+	MOVW	$1, R1
+	MOVB	R1, ret+12(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
+	MOVW	a+0(FP), R1
+	MOVW	b+4(FP), R2
+	BEQ	R1, R2, eq
+	MOVW	4(REGCTXT), R3	// compiler stores size at offset 4 in the closure
+	ADDU	R1, R3, R4
+loop:
+	BNE	R1, R4, test
+	MOVW	$1, R1
+	MOVB	R1, ret+8(FP)
+	RET
+test:
+	MOVBU	(R1), R6
+	ADDU	$1, R1
+	MOVBU	(R2), R7
+	ADDU	$1, R2
+	BEQ	R6, R7, loop
+
+	MOVB	R0, ret+8(FP)
+	RET
+eq:
+	MOVW	$1, R1
+	MOVB	R1, ret+8(FP)
+	RET
diff --git a/src/internal/bytealg/equal_native.go b/src/internal/bytealg/equal_native.go
new file mode 100644
index 0000000..cf3a245
--- /dev/null
+++ b/src/internal/bytealg/equal_native.go
@@ -0,0 +1,21 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+import "unsafe"
+
+// The declarations below generate ABI wrappers for functions
+// implemented in assembly in this package but declared in another
+// package.
+
+// The compiler generates calls to runtime.memequal and runtime.memequal_varlen.
+// In addition, the runtime calls runtime.memequal explicitly.
+// Those functions are implemented in this package.
+
+//go:linkname abigen_runtime_memequal runtime.memequal
+func abigen_runtime_memequal(a, b unsafe.Pointer, size uintptr) bool
+
+//go:linkname abigen_runtime_memequal_varlen runtime.memequal_varlen
+func abigen_runtime_memequal_varlen(a, b unsafe.Pointer) bool
diff --git a/src/internal/bytealg/equal_ppc64x.s b/src/internal/bytealg/equal_ppc64x.s
new file mode 100644
index 0000000..5f0fea5
--- /dev/null
+++ b/src/internal/bytealg/equal_ppc64x.s
@@ -0,0 +1,103 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOVD    a+0(FP), R3
+	MOVD    b+8(FP), R4
+	MOVD    size+16(FP), R5
+	MOVD    $ret+24(FP), R10
+
+	BR	memeqbody<>(SB)
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
+	MOVD	a+0(FP), R3
+	MOVD	b+8(FP), R4
+	CMP	R3, R4
+	BEQ	eq
+	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
+	MOVD    $ret+16(FP), R10
+	BR	memeqbody<>(SB)
+eq:
+	MOVD	$1, R3
+	MOVB	R3, ret+16(FP)
+	RET
+
+// Do an efficient memequal for ppc64
+// R3 = s1
+// R4 = s2
+// R5 = len
+// R10 = addr of return value (byte)
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD    R5,CTR
+	CMP     R5,$8		// only optimize >=8
+	BLT     simplecheck
+	DCBT	(R3)		// cache hint
+	DCBT	(R4)
+	CMP	R5,$32		// optimize >= 32
+	MOVD	R5,R6		// needed if setup8a branch
+	BLT	setup8a		// 8 byte moves only
+setup32a:                       // 8 byte aligned, >= 32 bytes
+	SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
+	MOVD	R6,CTR
+	MOVD	$16,R14		// index for VSX loads and stores
+loop32a:
+	LXVD2X  (R3+R0), VS32	// VS32 = V0
+	LXVD2X  (R4+R0), VS33	// VS33 = V1
+	VCMPEQUBCC V0, V1, V2	// compare, setting CR6
+	BGE     CR6, noteq
+	LXVD2X  (R3+R14), VS32
+	LXVD2X  (R4+R14), VS33
+	VCMPEQUBCC V0, V1, V2
+	BGE     CR6, noteq
+	ADD     $32,R3		// bump up to next 32
+	ADD     $32,R4
+	BC      16, 0, loop32a  // br ctr and cr
+	ANDCC	$24,R5,R6       // Any 8 byte chunks?
+	BEQ	leftover	// and result is 0
+setup8a:
+	SRADCC  $3,R6,R6        // get the 8 byte count
+	BEQ	leftover	// shifted value is 0
+	MOVD    R6,CTR
+loop8:
+	MOVD    0(R3),R6        // doublewords to compare
+	ADD	$8,R3
+	MOVD    0(R4),R7
+	ADD     $8,R4
+	CMP     R6,R7           // match?
+	BC	8,2,loop8	// bt ctr <> 0 && cr
+	BNE     noteq
+leftover:
+	ANDCC   $7,R5,R6        // check for leftover bytes
+	BEQ     equal
+	MOVD    R6,CTR
+	BR	simple
+simplecheck:
+	CMP	R5,$0
+	BEQ	equal
+simple:
+	MOVBZ   0(R3), R6
+	ADD	$1,R3
+	MOVBZ   0(R4), R7
+	ADD     $1,R4
+	CMP     R6, R7
+	BNE     noteq
+	BC      8,2,simple
+	BNE	noteq
+	BR	equal
+noteq:
+	MOVB    $0, (R10)
+	RET
+equal:
+	MOVD	$1, R3
+	MOVB	R3, (R10)
+	RET
+
diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s
new file mode 100644
index 0000000..22cb4fa
--- /dev/null
+++ b/src/internal/bytealg/equal_riscv64.s
@@ -0,0 +1,49 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define	CTXT	S4
+
+// func memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOV	a+0(FP), A1
+	MOV	b+8(FP), A2
+	BEQ	A1, A2, eq
+	MOV	size+16(FP), A3
+	ADD	A1, A3, A4
+loop:
+	BEQ	A1, A4, eq
+
+	MOVBU	(A1), A6
+	ADD	$1, A1
+	MOVBU	(A2), A7
+	ADD	$1, A2
+	BEQ	A6, A7, loop
+
+	MOVB	ZERO, ret+24(FP)
+	RET
+eq:
+	MOV	$1, A1
+	MOVB	A1, ret+24(FP)
+	RET
+
+// func memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+	MOV	a+0(FP), A1
+	MOV	b+8(FP), A2
+	BEQ	A1, A2, eq
+	MOV	8(CTXT), A3    // compiler stores size at offset 8 in the closure
+	MOV	A1, 8(X2)
+	MOV	A2, 16(X2)
+	MOV	A3, 24(X2)
+	CALL	runtime·memequal(SB)
+	MOVBU	32(X2), A1
+	MOVB	A1, ret+16(FP)
+	RET
+eq:
+	MOV	$1, A1
+	MOVB	A1, ret+16(FP)
+	RET
diff --git a/src/internal/bytealg/equal_s390x.s b/src/internal/bytealg/equal_s390x.s
new file mode 100644
index 0000000..67f814d
--- /dev/null
+++ b/src/internal/bytealg/equal_s390x.s
@@ -0,0 +1,92 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+	MOVD	a+0(FP), R3
+	MOVD	b+8(FP), R5
+	MOVD	size+16(FP), R6
+	LA	ret+24(FP), R7
+	BR	memeqbody<>(SB)
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
+	MOVD	a+0(FP), R3
+	MOVD	b+8(FP), R5
+	MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
+	LA	ret+16(FP), R7
+	BR	memeqbody<>(SB)
+
+// input:
+//   R3 = a
+//   R5 = b
+//   R6 = len
+//   R7 = address of output byte (stores 0 or 1 here)
+//   a and b have the same length
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+	CMPBEQ	R3, R5, equal
+loop:
+	CMPBEQ	R6, $0, equal
+	CMPBLT	R6, $32, tiny
+	CMP	R6, $256
+	BLT	tail
+	CLC	$256, 0(R3), 0(R5)
+	BNE	notequal
+	SUB	$256, R6
+	LA	256(R3), R3
+	LA	256(R5), R5
+	BR	loop
+tail:
+	SUB	$1, R6, R8
+	EXRL	$memeqbodyclc<>(SB), R8
+	BEQ	equal
+notequal:
+	MOVB	$0, 0(R7)
+	RET
+equal:
+	MOVB	$1, 0(R7)
+	RET
+tiny:
+	MOVD	$0, R2
+	CMPBLT	R6, $16, lt16
+	MOVD	0(R3), R8
+	MOVD	0(R5), R9
+	CMPBNE	R8, R9, notequal
+	MOVD	8(R3), R8
+	MOVD	8(R5), R9
+	CMPBNE	R8, R9, notequal
+	LA	16(R2), R2
+	SUB	$16, R6
+lt16:
+	CMPBLT	R6, $8, lt8
+	MOVD	0(R3)(R2*1), R8
+	MOVD	0(R5)(R2*1), R9
+	CMPBNE	R8, R9, notequal
+	LA	8(R2), R2
+	SUB	$8, R6
+lt8:
+	CMPBLT	R6, $4, lt4
+	MOVWZ	0(R3)(R2*1), R8
+	MOVWZ	0(R5)(R2*1), R9
+	CMPBNE	R8, R9, notequal
+	LA	4(R2), R2
+	SUB	$4, R6
+lt4:
+#define CHECK(n) \
+	CMPBEQ	R6, $n, equal \
+	MOVB	n(R3)(R2*1), R8 \
+	MOVB	n(R5)(R2*1), R9 \
+	CMPBNE	R8, R9, notequal
+	CHECK(0)
+	CHECK(1)
+	CHECK(2)
+	CHECK(3)
+	BR	equal
+
+TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
+	CLC	$1, 0(R3), 0(R5)
+	RET
diff --git a/src/internal/bytealg/equal_wasm.s b/src/internal/bytealg/equal_wasm.s
new file mode 100644
index 0000000..a2b76c1
--- /dev/null
+++ b/src/internal/bytealg/equal_wasm.s
@@ -0,0 +1,77 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(p, q unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB), NOSPLIT, $0-25
+	Get SP
+	I64Load a+0(FP)
+	I64Load b+8(FP)
+	I64Load size+16(FP)
+	Call memeqbody<>(SB)
+	I64Store8 ret+24(FP)
+	RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB), NOSPLIT, $0-17
+	Get SP
+	I64Load a+0(FP)
+	I64Load b+8(FP)
+	I64Load 8(CTXT) // compiler stores size at offset 8 in the closure
+	Call memeqbody<>(SB)
+	I64Store8 ret+16(FP)
+	RET
+
+// params: a, b, len
+// ret: 0/1
+TEXT memeqbody<>(SB), NOSPLIT, $0-0
+	Get R0
+	Get R1
+	I64Eq
+	If
+		I64Const $1
+		Return
+	End
+
+loop:
+	Loop
+		Get R2
+		I64Eqz
+		If
+			I64Const $1
+			Return
+		End
+
+		Get R0
+		I32WrapI64
+		I64Load8U $0
+		Get R1
+		I32WrapI64
+		I64Load8U $0
+		I64Ne
+		If
+			I64Const $0
+			Return
+		End
+
+		Get R0
+		I64Const $1
+		I64Add
+		Set R0
+
+		Get R1
+		I64Const $1
+		I64Add
+		Set R1
+
+		Get R2
+		I64Const $1
+		I64Sub
+		Set R2
+
+		Br loop
+	End
+	UNDEF
diff --git a/src/internal/bytealg/index_amd64.go b/src/internal/bytealg/index_amd64.go
new file mode 100644
index 0000000..c7a1941
--- /dev/null
+++ b/src/internal/bytealg/index_amd64.go
@@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+import "internal/cpu"
+
+const MaxBruteForce = 64
+
+func init() {
+	if cpu.X86.HasAVX2 {
+		MaxLen = 63
+	} else {
+		MaxLen = 31
+	}
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func Cutover(n int) int {
+	// 1 error per 8 characters, plus a few slop to start.
+	return (n + 16) / 8
+}
diff --git a/src/internal/bytealg/index_amd64.s b/src/internal/bytealg/index_amd64.s
new file mode 100644
index 0000000..6193b57
--- /dev/null
+++ b/src/internal/bytealg/index_amd64.s
@@ -0,0 +1,274 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Index(SB),NOSPLIT,$0-56
+	MOVQ a_base+0(FP), DI
+	MOVQ a_len+8(FP), DX
+	MOVQ b_base+24(FP), R8
+	MOVQ b_len+32(FP), AX
+	MOVQ DI, R10
+	LEAQ ret+48(FP), R11
+	JMP  indexbody<>(SB)
+
+TEXT ·IndexString(SB),NOSPLIT,$0-40
+	MOVQ a_base+0(FP), DI
+	MOVQ a_len+8(FP), DX
+	MOVQ b_base+16(FP), R8
+	MOVQ b_len+24(FP), AX
+	MOVQ DI, R10
+	LEAQ ret+32(FP), R11
+	JMP  indexbody<>(SB)
+
+// AX: length of string, that we are searching for
+// DX: length of string, in which we are searching
+// DI: pointer to string, in which we are searching
+// R8: pointer to string, that we are searching for
+// R11: address, where to put return value
+// Note: We want len in DX and AX, because PCMPESTRI implicitly consumes them
+TEXT indexbody<>(SB),NOSPLIT,$0
+	CMPQ AX, DX
+	JA fail
+	CMPQ DX, $16
+	JAE sse42
+no_sse42:
+	CMPQ AX, $2
+	JA   _3_or_more
+	MOVW (R8), R8
+	LEAQ -1(DI)(DX*1), DX
+loop2:
+	MOVW (DI), SI
+	CMPW SI,R8
+	JZ success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop2
+	JMP fail
+_3_or_more:
+	CMPQ AX, $3
+	JA   _4_or_more
+	MOVW 1(R8), BX
+	MOVW (R8), R8
+	LEAQ -2(DI)(DX*1), DX
+loop3:
+	MOVW (DI), SI
+	CMPW SI,R8
+	JZ   partial_success3
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop3
+	JMP fail
+partial_success3:
+	MOVW 1(DI), SI
+	CMPW SI,BX
+	JZ success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop3
+	JMP fail
+_4_or_more:
+	CMPQ AX, $4
+	JA   _5_or_more
+	MOVL (R8), R8
+	LEAQ -3(DI)(DX*1), DX
+loop4:
+	MOVL (DI), SI
+	CMPL SI,R8
+	JZ   success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop4
+	JMP fail
+_5_or_more:
+	CMPQ AX, $7
+	JA   _8_or_more
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVL -4(R8)(AX*1), BX
+	MOVL (R8), R8
+loop5to7:
+	MOVL (DI), SI
+	CMPL SI,R8
+	JZ   partial_success5to7
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop5to7
+	JMP fail
+partial_success5to7:
+	MOVL -4(AX)(DI*1), SI
+	CMPL SI,BX
+	JZ success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop5to7
+	JMP fail
+_8_or_more:
+	CMPQ AX, $8
+	JA   _9_or_more
+	MOVQ (R8), R8
+	LEAQ -7(DI)(DX*1), DX
+loop8:
+	MOVQ (DI), SI
+	CMPQ SI,R8
+	JZ   success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop8
+	JMP fail
+_9_or_more:
+	CMPQ AX, $15
+	JA   _16_or_more
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVQ -8(R8)(AX*1), BX
+	MOVQ (R8), R8
+loop9to15:
+	MOVQ (DI), SI
+	CMPQ SI,R8
+	JZ   partial_success9to15
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop9to15
+	JMP fail
+partial_success9to15:
+	MOVQ -8(AX)(DI*1), SI
+	CMPQ SI,BX
+	JZ success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop9to15
+	JMP fail
+_16_or_more:
+	CMPQ AX, $16
+	JA   _17_or_more
+	MOVOU (R8), X1
+	LEAQ -15(DI)(DX*1), DX
+loop16:
+	MOVOU (DI), X2
+	PCMPEQB X1, X2
+	PMOVMSKB X2, SI
+	CMPQ  SI, $0xffff
+	JE   success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop16
+	JMP fail
+_17_or_more:
+	CMPQ AX, $31
+	JA   _32_or_more
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	MOVOU -16(R8)(AX*1), X0
+	MOVOU (R8), X1
+loop17to31:
+	MOVOU (DI), X2
+	PCMPEQB X1,X2
+	PMOVMSKB X2, SI
+	CMPQ  SI, $0xffff
+	JE   partial_success17to31
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop17to31
+	JMP fail
+partial_success17to31:
+	MOVOU -16(AX)(DI*1), X3
+	PCMPEQB X0, X3
+	PMOVMSKB X3, SI
+	CMPQ  SI, $0xffff
+	JE success
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop17to31
+	JMP fail
+// We can get here only when AVX2 is enabled and cutoff for indexShortStr is set to 63
+// So no need to check cpuid
+_32_or_more:
+	CMPQ AX, $32
+	JA   _33_to_63
+	VMOVDQU (R8), Y1
+	LEAQ -31(DI)(DX*1), DX
+loop32:
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPMOVMSKB Y3, SI
+	CMPL  SI, $0xffffffff
+	JE   success_avx2
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop32
+	JMP fail_avx2
+_33_to_63:
+	LEAQ 1(DI)(DX*1), DX
+	SUBQ AX, DX
+	VMOVDQU -32(R8)(AX*1), Y0
+	VMOVDQU (R8), Y1
+loop33to63:
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPMOVMSKB Y3, SI
+	CMPL  SI, $0xffffffff
+	JE   partial_success33to63
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop33to63
+	JMP fail_avx2
+partial_success33to63:
+	VMOVDQU -32(AX)(DI*1), Y3
+	VPCMPEQB Y0, Y3, Y4
+	VPMOVMSKB Y4, SI
+	CMPL  SI, $0xffffffff
+	JE success_avx2
+	ADDQ $1,DI
+	CMPQ DI,DX
+	JB loop33to63
+fail_avx2:
+	VZEROUPPER
+fail:
+	MOVQ $-1, (R11)
+	RET
+success_avx2:
+	VZEROUPPER
+	JMP success
+sse42:
+	CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1
+	JNE no_sse42
+	CMPQ AX, $12
+	// PCMPESTRI is slower than normal compare,
+	// so using it makes sense only if we advance 4+ bytes per compare
+	// This value was determined experimentally and is the ~same
+	// on Nehalem (first with SSE42) and Haswell.
+	JAE _9_or_more
+	LEAQ 16(R8), SI
+	TESTW $0xff0, SI
+	JEQ no_sse42
+	MOVOU (R8), X1
+	LEAQ -15(DI)(DX*1), SI
+	MOVQ $16, R9
+	SUBQ AX, R9 // We advance by 16-len(sep) each iteration, so precalculate it into R9
+loop_sse42:
+	// 0x0c means: unsigned byte compare (bits 0,1 are 00)
+	// for equality (bits 2,3 are 11)
+	// result is not masked or inverted (bits 4,5 are 00)
+	// and corresponds to first matching byte (bit 6 is 0)
+	PCMPESTRI $0x0c, (DI), X1
+	// CX == 16 means no match,
+	// CX > R9 means partial match at the end of the string,
+	// otherwise sep is at offset CX from X1 start
+	CMPQ CX, R9
+	JBE sse42_success
+	ADDQ R9, DI
+	CMPQ DI, SI
+	JB loop_sse42
+	PCMPESTRI $0x0c, -1(SI), X1
+	CMPQ CX, R9
+	JA fail
+	LEAQ -1(SI), DI
+sse42_success:
+	ADDQ CX, DI
+success:
+	SUBQ R10, DI
+	MOVQ DI, (R11)
+	RET
diff --git a/src/internal/bytealg/index_arm64.go b/src/internal/bytealg/index_arm64.go
new file mode 100644
index 0000000..e87c109
--- /dev/null
+++ b/src/internal/bytealg/index_arm64.go
@@ -0,0 +1,23 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+// Empirical data shows that using Index can get better
+// performance when len(s) <= 16.
+const MaxBruteForce = 16
+
+func init() {
+	// Optimize cases where the length of the substring is less than 32 bytes
+	MaxLen = 32
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func Cutover(n int) int {
+	// 1 error per 16 characters, plus a few slop to start.
+	return 4 + n>>4
+}
diff --git a/src/internal/bytealg/index_arm64.s b/src/internal/bytealg/index_arm64.s
new file mode 100644
index 0000000..3a551a7
--- /dev/null
+++ b/src/internal/bytealg/index_arm64.s
@@ -0,0 +1,206 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Index(SB),NOSPLIT,$0-56
+	MOVD	a_base+0(FP), R0
+	MOVD	a_len+8(FP), R1
+	MOVD	b_base+24(FP), R2
+	MOVD	b_len+32(FP), R3
+	MOVD	$ret+48(FP), R9
+	B	indexbody<>(SB)
+
+TEXT ·IndexString(SB),NOSPLIT,$0-40
+	MOVD	a_base+0(FP), R0
+	MOVD	a_len+8(FP), R1
+	MOVD	b_base+16(FP), R2
+	MOVD	b_len+24(FP), R3
+	MOVD	$ret+32(FP), R9
+	B	indexbody<>(SB)
+
+// input:
+//   R0: haystack
+//   R1: length of haystack
+//   R2: needle
+//   R3: length of needle (2 <= len <= 32)
+//   R9: address to put result
+TEXT indexbody<>(SB),NOSPLIT,$0-56
+	// main idea is to load 'sep' into separate register(s)
+	// to avoid repeatedly re-load it again and again
+	// for sebsequent substring comparisons
+	SUB	R3, R1, R4
+	// R4 contains the start of last substring for comparison
+	ADD	R0, R4, R4
+	ADD	$1, R0, R8
+
+	CMP	$8, R3
+	BHI	greater_8
+	TBZ	$3, R3, len_2_7
+len_8:
+	// R5 contains 8-byte of sep
+	MOVD	(R2), R5
+loop_8:
+	// R6 contains substring for comparison
+	CMP	R4, R0
+	BHI	not_found
+	MOVD.P	1(R0), R6
+	CMP	R5, R6
+	BNE	loop_8
+	B	found
+len_2_7:
+	TBZ	$2, R3, len_2_3
+	TBZ	$1, R3, len_4_5
+	TBZ	$0, R3, len_6
+len_7:
+	// R5 and R6 contain 7-byte of sep
+	MOVWU	(R2), R5
+	// 1-byte overlap with R5
+	MOVWU	3(R2), R6
+loop_7:
+	CMP	R4, R0
+	BHI	not_found
+	MOVWU.P	1(R0), R3
+	CMP	R5, R3
+	BNE	loop_7
+	MOVWU	2(R0), R3
+	CMP	R6, R3
+	BNE	loop_7
+	B	found
+len_6:
+	// R5 and R6 contain 6-byte of sep
+	MOVWU	(R2), R5
+	MOVHU	4(R2), R6
+loop_6:
+	CMP	R4, R0
+	BHI	not_found
+	MOVWU.P	1(R0), R3
+	CMP	R5, R3
+	BNE	loop_6
+	MOVHU	3(R0), R3
+	CMP	R6, R3
+	BNE	loop_6
+	B	found
+len_4_5:
+	TBZ	$0, R3, len_4
+len_5:
+	// R5 and R7 contain 5-byte of sep
+	MOVWU	(R2), R5
+	MOVBU	4(R2), R7
+loop_5:
+	CMP	R4, R0
+	BHI	not_found
+	MOVWU.P	1(R0), R3
+	CMP	R5, R3
+	BNE	loop_5
+	MOVBU	3(R0), R3
+	CMP	R7, R3
+	BNE	loop_5
+	B	found
+len_4:
+	// R5 contains 4-byte of sep
+	MOVWU	(R2), R5
+loop_4:
+	CMP	R4, R0
+	BHI	not_found
+	MOVWU.P	1(R0), R6
+	CMP	R5, R6
+	BNE	loop_4
+	B	found
+len_2_3:
+	TBZ	$0, R3, len_2
+len_3:
+	// R6 and R7 contain 3-byte of sep
+	MOVHU	(R2), R6
+	MOVBU	2(R2), R7
+loop_3:
+	CMP	R4, R0
+	BHI	not_found
+	MOVHU.P	1(R0), R3
+	CMP	R6, R3
+	BNE	loop_3
+	MOVBU	1(R0), R3
+	CMP	R7, R3
+	BNE	loop_3
+	B	found
+len_2:
+	// R5 contains 2-byte of sep
+	MOVHU	(R2), R5
+loop_2:
+	CMP	R4, R0
+	BHI	not_found
+	MOVHU.P	1(R0), R6
+	CMP	R5, R6
+	BNE	loop_2
+found:
+	SUB	R8, R0, R0
+	MOVD	R0, (R9)
+	RET
+not_found:
+	MOVD	$-1, R0
+	MOVD	R0, (R9)
+	RET
+greater_8:
+	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
+	CMP	$16, R3
+	BHI	greater_16
+len_9_16:
+	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
+	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
+	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
+loop_9_16:
+	// search the first 8 bytes first
+	CMP	R4, R0
+	BHI	not_found
+	MOVD.P	1(R0), R7
+	CMP	R5, R7
+	BNE	loop_9_16
+	MOVD	(R0)(R11), R7
+	CMP	R6, R7		// compare the last 8 bytes
+	BNE	loop_9_16
+	B	found
+greater_16:
+	CMP	$24, R3
+	BHI	len_25_32
+len_17_24:
+	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
+	SUB	$24, R3, R10		// len(sep) - 24
+	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
+loop_17_24:
+	// search the first 16 bytes first
+	CMP	R4, R0
+	BHI	not_found
+	MOVD.P	1(R0), R10
+	CMP	R5, R10
+	BNE	loop_17_24
+	MOVD	7(R0), R10
+	CMP	R6, R10
+	BNE	loop_17_24
+	MOVD	(R0)(R11), R10
+	CMP	R7, R10		// compare the last 8 bytes
+	BNE	loop_17_24
+	B	found
+len_25_32:
+	LDP.P	16(R2), (R5, R6)
+	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
+	SUB	$32, R3, R12	// len(sep) - 32
+	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
+loop_25_32:
+	// search the first 24 bytes first
+	CMP	R4, R0
+	BHI	not_found
+	MOVD.P	1(R0), R12
+	CMP	R5, R12
+	BNE	loop_25_32
+	MOVD	7(R0), R12
+	CMP	R6, R12
+	BNE	loop_25_32
+	MOVD	15(R0), R12
+	CMP	R7, R12
+	BNE	loop_25_32
+	MOVD	(R0)(R11), R12
+	CMP	R10, R12	// compare the last 8 bytes
+	BNE	loop_25_32
+	B	found
diff --git a/src/internal/bytealg/index_generic.go b/src/internal/bytealg/index_generic.go
new file mode 100644
index 0000000..0a6eb90
--- /dev/null
+++ b/src/internal/bytealg/index_generic.go
@@ -0,0 +1,30 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 && !arm64 && !s390x && !ppc64le && !ppc64
+// +build !amd64,!arm64,!s390x,!ppc64le,!ppc64
+
+package bytealg
+
+const MaxBruteForce = 0
+
+// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func Index(a, b []byte) int {
+	panic("unimplemented")
+}
+
+// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func IndexString(a, b string) int {
+	panic("unimplemented")
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func Cutover(n int) int {
+	panic("unimplemented")
+}
diff --git a/src/internal/bytealg/index_native.go b/src/internal/bytealg/index_native.go
new file mode 100644
index 0000000..9547a5d
--- /dev/null
+++ b/src/internal/bytealg/index_native.go
@@ -0,0 +1,20 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || arm64 || s390x || ppc64le || ppc64
+// +build amd64 arm64 s390x ppc64le ppc64
+
+package bytealg
+
+//go:noescape
+
+// Index returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func Index(a, b []byte) int
+
+//go:noescape
+
+// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
+// Requires 2 <= len(b) <= MaxLen.
+func IndexString(a, b string) int
diff --git a/src/internal/bytealg/index_ppc64x.go b/src/internal/bytealg/index_ppc64x.go
new file mode 100644
index 0000000..c9b2b5a
--- /dev/null
+++ b/src/internal/bytealg/index_ppc64x.go
@@ -0,0 +1,28 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (aix || linux) && (ppc64 || ppc64le)
+// +build aix linux
+// +build ppc64 ppc64le
+
+package bytealg
+
+import "internal/cpu"
+
+const MaxBruteForce = 16
+
+var SupportsPower9 = cpu.PPC64.IsPOWER9
+
+func init() {
+	MaxLen = 32
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func Cutover(n int) int {
+	// 1 error per 8 characters, plus a few slop to start.
+	return (n + 16) / 8
+}
diff --git a/src/internal/bytealg/index_ppc64x.s b/src/internal/bytealg/index_ppc64x.s
new file mode 100644
index 0000000..3ed9442
--- /dev/null
+++ b/src/internal/bytealg/index_ppc64x.s
@@ -0,0 +1,768 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is an implementation based on the s390x
+// implementation.
+
+// Find a separator with 2 <= len <= 32 within a string.
+// Separators with lengths of 2, 3 or 4 are handled
+// specially.
+
+// This works on power8 and above. The loads and
+// compares are done in big endian order
+// since that allows the used of VCLZD, and allows
+// the same implementation to work on big and little
+// endian platforms with minimal conditional changes.
+
+// NOTE: There is a power9 implementation that
+// improves performance by 10-15% on little
+// endian for some of the benchmarks, but
+// work is still needed for a big endian
+// implementation on power9.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// Needed to swap LXVD2X loads to the correct
+// byte order to work on POWER8.
+
+#ifdef GOARCH_ppc64
+DATA byteswap<>+0(SB)/8, $0x0001020304050607
+DATA byteswap<>+8(SB)/8, $0x08090a0b0c0d0e0f
+#else
+DATA byteswap<>+0(SB)/8, $0x0706050403020100
+DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
+#endif
+
+// Load bytes in big endian order. Address
+// alignment does not need checking.
+#define VLOADSWAP(base, index, vreg, vsreg) \
+	LXVD2X (base)(index), vsreg;  \
+	VPERM  vreg, vreg, SWAP, vreg
+
+GLOBL byteswap<>+0(SB), RODATA, $16
+
+TEXT ·Index(SB), NOSPLIT|NOFRAME, $0-56
+	MOVD a_base+0(FP), R3  // R3 = byte array pointer
+	MOVD a_len+8(FP), R4   // R4 = length
+	MOVD b_base+24(FP), R5 // R5 = separator pointer
+	MOVD b_len+32(FP), R6  // R6 = separator length
+	MOVD $ret+48(FP), R14  // R14 = &ret
+
+#ifdef GOARCH_ppc64le
+	MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
+	CMP   R7, $1
+	BNE   power8
+	BR    indexbodyp9<>(SB)
+
+#endif
+power8:
+	BR indexbody<>(SB)
+
+TEXT ·IndexString(SB), NOSPLIT|NOFRAME, $0-40
+	MOVD a_base+0(FP), R3  // R3 = string
+	MOVD a_len+8(FP), R4   // R4 = length
+	MOVD b_base+16(FP), R5 // R5 = separator pointer
+	MOVD b_len+24(FP), R6  // R6 = separator length
+	MOVD $ret+32(FP), R14  // R14 = &ret
+
+#ifdef GOARCH_ppc64le
+	MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7
+	CMP   R7, $1
+	BNE   power8
+	BR    indexbody<>(SB)
+
+#endif
+power8:
+	BR indexbody<>(SB)
+
+	// s: string we are searching
+	// sep: string to search for
+	// R3=&s[0], R4=len(s)
+	// R5=&sep[0], R6=len(sep)
+	// R14=&ret (index where sep found)
+	// R7=working addr of string
+	// R16=index value 16
+	// R17=index value 17
+	// R18=index value 18
+	// R19=index value 1
+	// R26=LASTBYTE of string
+	// R27=LASTSTR last start byte to compare with sep
+	// R8, R9 scratch
+	// V0=sep left justified zero fill
+	// CR4=sep length >= 16
+
+#define SEPMASK V17
+#define LASTBYTE R26
+#define LASTSTR R27
+#define ONES V20
+#define SWAP V21
+#define V0_ VS32
+#define V1_ VS33
+#define V2_ VS34
+#define V3_ VS35
+#define V4_ VS36
+#define V5_ VS37
+#define V6_ VS38
+#define V7_ VS39
+#define V8_ VS40
+#define V9_ VS41
+#define SWAP_ VS53
+TEXT indexbody<>(SB), NOSPLIT|NOFRAME, $0
+	CMP      R6, R4                 // Compare lengths
+	BGT      notfound               // If sep len is > string, notfound
+	ADD      R4, R3, LASTBYTE       // find last byte addr
+	SUB      R6, LASTBYTE, LASTSTR  // LAST=&s[len(s)-len(sep)] (last valid start index)
+	CMP      R6, $0                 // Check sep len
+	BEQ      notfound               // sep len 0 -- not found
+	MOVD     R3, R7                 // Copy of string addr
+	MOVD     $16, R16               // Index value 16
+	MOVD     $17, R17               // Index value 17
+	MOVD     $18, R18               // Index value 18
+	MOVD     $1, R19                // Index value 1
+	MOVD     $byteswap<>+00(SB), R8
+	VSPLTISB $0xFF, ONES            // splat all 1s
+	LXVD2X   (R8)(R0), SWAP_        // Set up swap string
+
+	CMP    R6, $16, CR4        // CR4 for len(sep) >= 16
+	VOR    ONES, ONES, SEPMASK // Set up full SEPMASK
+	BGE    CR4, loadge16       // Load for len(sep) >= 16
+	SUB    R6, R16, R9         // 16-len of sep
+	SLD    $3, R9              // Set up for VSLO
+	MTVSRD R9, V9_             // Set up for VSLO
+	VSLDOI $8, V9, V9, V9      // Set up for VSLO
+	VSLO   ONES, V9, SEPMASK   // Mask for separator len(sep) < 16
+
+loadge16:
+	ANDCC $15, R5, R9 // Find byte offset of sep
+	ADD   R9, R6, R10 // Add sep len
+	CMP   R10, $16    // Check if sep len+offset > 16
+	BGE   sepcross16  // Sep crosses 16 byte boundary
+
+	RLDICR $0, R5, $59, R8 // Adjust addr to 16 byte container
+	VLOADSWAP(R8, R0, V0, V0_)// Load 16 bytes @R8 into V0
+	SLD    $3, R9          // Set up shift count for VSLO
+	MTVSRD R9, V8_         // Set up shift count for VSLO
+	VSLDOI $8, V8, V8, V8
+	VSLO   V0, V8, V0      // Shift by start byte
+
+	VAND V0, SEPMASK, V0 // Mask separator (< 16)
+	BR   index2plus
+
+sepcross16:
+	VLOADSWAP(R5, R0, V0, V0_) // Load 16 bytes @R5 into V0
+
+	VAND V0, SEPMASK, V0 // mask out separator
+	BLE  CR4, index2to16
+	BR   index17plus     // Handle sep > 16
+
+index2plus:
+	CMP      R6, $2       // Check length of sep
+	BNE      index3plus   // If not 2, check for 3
+	ADD      $16, R7, R9  // Check if next 16 bytes past last
+	CMP      R9, LASTBYTE // compare with last
+	BGE      index2to16   // 2 <= len(string) <= 16
+	MOVD     $0xff00, R21 // Mask for later
+	MTVSRD   R21, V25     // Move to Vreg
+	VSPLTH   $3, V25, V31 // Splat mask
+	VSPLTH   $0, V0, V1   // Splat 1st 2 bytes of sep
+	VSPLTISB $0, V10      // Clear V10
+
+	// First case: 2 byte separator
+	// V1: 2 byte separator splatted
+	// V2: 16 bytes at addr
+	// V4: 16 bytes at addr+1
+	// Compare 2 byte separator at start
+	// and at start+1. Use VSEL to combine
+	// those results to find the first
+	// matching start byte, returning
+	// that value when found. Loop as
+	// long as len(string) > 16
+index2loop2:
+	VLOADSWAP(R7, R19, V3, V3_) // Load 16 bytes @R7+1 into V3
+
+index2loop:
+	VLOADSWAP(R7, R0, V2, V2_) // Load 16 bytes @R7 into V2
+	VCMPEQUH V1, V2, V5        // Search for sep
+	VCMPEQUH V1, V3, V6        // Search for sep offset by 1
+	VSEL     V6, V5, V31, V7   // merge even and odd indices
+	VCLZD    V7, V18           // find index of first match
+	MFVSRD   V18, R25          // get first value
+	CMP      R25, $64          // Found if < 64
+	BLT      foundR25          // Return byte index where found
+	VSLDOI   $8, V18, V18, V18 // Adjust 2nd value
+	MFVSRD   V18, R25          // get second value
+	CMP      R25, $64          // Found if < 64
+	ADD      $64, R25          // Update byte offset
+	BLT      foundR25          // Return value
+	ADD      $16, R7           // R7+=16 Update string pointer
+	ADD      $17, R7, R9       // R9=F7+17 since loop unrolled
+	CMP      R9, LASTBYTE      // Compare addr+17 against last byte
+	BLT      index2loop2       // If < last, continue loop
+	CMP      R7, LASTBYTE      // Compare addr+16 against last byte
+	BLT      index2to16        // If < 16 handle specially
+	VLOADSWAP(R7, R0, V3, V3_) // Load 16 bytes @R7 into V3
+	VSLDOI   $1, V3, V10, V3   // Shift left by 1 byte
+	BR       index2loop
+
+index3plus:
+	CMP    R6, $3       // Check if sep == 3
+	BNE    index4plus   // If not check larger
+	ADD    $19, R7, R9  // Find bytes for use in this loop
+	CMP    R9, LASTBYTE // Compare against last byte
+	BGE    index2to16   // Remaining string 2<=len<=16
+	MOVD   $0xff00, R21 // Set up mask for upcoming loop
+	MTVSRD R21, V25     // Move mask to Vreg
+	VSPLTH $3, V25, V31 // Splat mask
+	VSPLTH $0, V0, V1   // Splat 1st two bytes of sep
+	VSPLTB $2, V0, V8   // Splat 3rd byte of sep
+
+	// Loop to process 3 byte separator.
+	// string[0:16] is in V2
+	// string[2:18] is in V3
+	// sep[0:2] splatted in V1
+	// sec[3] splatted in v8
+	// Load vectors at string, string+1
+	// and string+2. Compare string, string+1
+	// against first 2 bytes of separator
+	// splatted, and string+2 against 3rd
+	// byte splatted. Merge the results with
+	// VSEL to find the first byte of a match.
+
+	// Special handling for last 16 bytes if the
+	// string fits in 16 byte multiple.
+index3loop2:
+	MOVD     $2, R21          // Set up index for 2
+	VSPLTISB $0, V10          // Clear V10
+	VLOADSWAP(R7, R21, V3, V3_)// Load 16 bytes @R7+2 into V3
+	VSLDOI   $14, V3, V10, V3 // Left justify next 2 bytes
+
+index3loop:
+	VLOADSWAP(R7, R0, V2, V2_) // Load with correct order
+	VSLDOI   $1, V2, V3, V4    // string[1:17]
+	VSLDOI   $2, V2, V3, V9    // string[2:18]
+	VCMPEQUH V1, V2, V5        // compare hw even indices
+	VCMPEQUH V1, V4, V6        // compare hw odd indices
+	VCMPEQUB V8, V9, V10       // compare 3rd to last byte
+	VSEL     V6, V5, V31, V7   // Find 1st matching byte using mask
+	VAND     V7, V10, V7       // AND matched bytes with matched 3rd byte
+	VCLZD    V7, V18           // Find first nonzero indexes
+	MFVSRD   V18, R25          // Move 1st doubleword
+	CMP      R25, $64          // If < 64 found
+	BLT      foundR25          // Return matching index
+	VSLDOI   $8, V18, V18, V18 // Move value
+	MFVSRD   V18, R25          // Move 2nd doubleword
+	CMP      R25, $64          // If < 64 found
+	ADD      $64, R25          // Update byte index
+	BLT      foundR25          // Return matching index
+	ADD      $16, R7           // R7+=16 string ptr
+	ADD      $19, R7, R9       // Number of string bytes for loop
+	CMP      R9, LASTBYTE      // Compare against last byte of string
+	BLT      index3loop2       // If within, continue this loop
+	CMP      R7, LASTSTR       // Compare against last start byte
+	BLT      index2to16        // Process remainder
+	VSPLTISB $0, V3            // Special case for last 16 bytes
+	BR       index3loop        // Continue this loop
+
+	// Loop to process 4 byte separator
+	// string[0:16] in V2
+	// string[3:16] in V3
+	// sep[0:4] splatted in V1
+	// Set up vectors with strings at offsets
+	// 0, 1, 2, 3 and compare against the 4 byte
+	// separator also splatted. Use VSEL with the
+	// compare results to find the first byte where
+	// a separator match is found.
+index4plus:
+	CMP  R6, $4       // Check if 4 byte separator
+	BNE  index5plus   // If not next higher
+	ADD  $20, R7, R9  // Check string size to load
+	CMP  R9, LASTBYTE // Verify string length
+	BGE  index2to16   // If not large enough, process remaining
+	MOVD $2, R15      // Set up index
+
+	// Set up masks for use with VSEL
+	MOVD   $0xff, R21        // Set up mask 0xff000000ff000000...
+	SLD    $24, R21
+	MTVSRD R21, V10
+	VSPLTW $1, V10, V29
+	VSLDOI $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
+	MOVD   $0xffff, R21
+	SLD    $16, R21
+	MTVSRD R21, V10
+	VSPLTW $1, V10, V31      // Mask 0xffff0000ffff0000...
+	VSPLTW $0, V0, V1        // Splat 1st word of separator
+
+index4loop:
+	VLOADSWAP(R7, R0, V2, V2_) // Load 16 bytes @R7 into V2
+
+next4:
+	VSPLTISB $0, V10            // Clear
+	MOVD     $3, R9             // Number of bytes beyond 16
+	VLOADSWAP(R7, R9, V3, V3_)  // Load 16 bytes @R7+3 into V3
+	VSLDOI   $13, V3, V10, V3   // Shift left last 3 bytes
+	VSLDOI   $1, V2, V3, V4     // V4=(V2:V3)<<1
+	VSLDOI   $2, V2, V3, V9     // V9=(V2:V3)<<2
+	VSLDOI   $3, V2, V3, V10    // V10=(V2:v3)<<3
+	VCMPEQUW V1, V2, V5         // compare index 0, 4, ... with sep
+	VCMPEQUW V1, V4, V6         // compare index 1, 5, ... with sep
+	VCMPEQUW V1, V9, V11        // compare index 2, 6, ... with sep
+	VCMPEQUW V1, V10, V12       // compare index 3, 7, ... with sep
+	VSEL     V6, V5, V29, V13   // merge index 0, 1, 4, 5, using mask
+	VSEL     V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
+	VSEL     V14, V13, V31, V7  // final merge
+	VCLZD    V7, V18            // Find first index for each half
+	MFVSRD   V18, R25           // Isolate value
+	CMP      R25, $64           // If < 64, found
+	BLT      foundR25           // Return found index
+	VSLDOI   $8, V18, V18, V18  // Move for MFVSRD
+	MFVSRD   V18, R25           // Isolate other value
+	CMP      R25, $64           // If < 64, found
+	ADD      $64, R25           // Update index for high doubleword
+	BLT      foundR25           // Return found index
+	ADD      $16, R7            // R7+=16 for next string
+	ADD      $20, R7, R9        // R+20 for all bytes to load
+	CMP      R9, LASTBYTE       // Past end? Maybe check for extra?
+	BLT      index4loop         // If not, continue loop
+	CMP      R7, LASTSTR        // Check remainder
+	BLE      index2to16         // Process remainder
+	BR       notfound           // Not found
+
+index5plus:
+	CMP R6, $16     // Check for sep > 16
+	BGT index17plus // Handle large sep
+
+	// Assumption is that the separator is smaller than the string at this point
+index2to16:
+	CMP R7, LASTSTR // Compare last start byte
+	BGT notfound    // last takes len(sep) into account
+
+	ADD $16, R7, R9    // Check for last byte of string
+	CMP R9, LASTBYTE
+	BGT index2to16tail
+
+	// At least 16 bytes of string left
+	// Mask the number of bytes in sep
+index2to16loop:
+	VLOADSWAP(R7, R0, V1, V1_) // Load 16 bytes @R7 into V1
+
+compare:
+	VAND       V1, SEPMASK, V2 // Mask out sep size
+	VCMPEQUBCC V0, V2, V3      // Compare masked string
+	BLT        CR6, found      // All equal
+	ADD        $1, R7          // Update ptr to next byte
+	CMP        R7, LASTSTR     // Still less than last start byte
+	BGT        notfound        // Not found
+	ADD        $16, R7, R9     // Verify remaining bytes
+	CMP        R9, LASTBYTE    // At least 16
+	BLT        index2to16loop  // Try again
+
+	// Less than 16 bytes remaining in string
+	// Separator >= 2
+index2to16tail:
+	ADD   R3, R4, R9     // End of string
+	SUB   R7, R9, R9     // Number of bytes left
+	ANDCC $15, R7, R10   // 16 byte offset
+	ADD   R10, R9, R11   // offset + len
+	CMP   R11, $16       // >= 16?
+	BLE   short          // Does not cross 16 bytes
+	VLOADSWAP(R7, R0, V1, V1_)// Load 16 bytes @R7 into V1
+	BR    index2to16next // Continue on
+
+short:
+	RLDICR   $0, R7, $59, R9 // Adjust addr to 16 byte container
+	VLOADSWAP(R9, R0, V1, V1_)// Load 16 bytes @R9 into V1
+	SLD      $3, R10         // Set up shift
+	MTVSRD   R10, V8_        // Set up shift
+	VSLDOI   $8, V8, V8, V8
+	VSLO     V1, V8, V1      // Shift by start byte
+	VSPLTISB $0, V25         // Clear for later use
+
+index2to16next:
+	VAND       V1, SEPMASK, V2 // Just compare size of sep
+	VCMPEQUBCC V0, V2, V3      // Compare sep and partial string
+	BLT        CR6, found      // Found
+	ADD        $1, R7          // Not found, try next partial string
+	CMP        R7, LASTSTR     // Check for end of string
+	BGT        notfound        // If at end, then not found
+	VSLDOI     $1, V1, V25, V1 // Shift string left by 1 byte
+	BR         index2to16next  // Check the next partial string
+
+index17plus:
+	CMP      R6, $32      // Check if 17 < len(sep) <= 32
+	BGT      index33plus
+	SUB      $16, R6, R9  // Extra > 16
+	SLD      $56, R9, R10 // Shift to use in VSLO
+	MTVSRD   R10, V9_     // Set up for VSLO
+	VLOADSWAP(R5, R9, V1, V1_)// Load 16 bytes @R5+R9 into V1
+	VSLO     V1, V9, V1   // Shift left
+	VSPLTISB $0xff, V7    // Splat 1s
+	VSPLTISB $0, V27      // Splat 0
+
+index17to32loop:
+	VLOADSWAP(R7, R0, V2, V2_) // Load 16 bytes @R7 into V2
+
+next17:
+	VLOADSWAP(R7, R9, V3, V3_) // Load 16 bytes @R7+R9 into V3
+	VSLO       V3, V9, V3      // Shift left
+	VCMPEQUB   V0, V2, V4      // Compare first 16 bytes
+	VCMPEQUB   V1, V3, V5      // Compare extra over 16 bytes
+	VAND       V4, V5, V6      // Check if both equal
+	VCMPEQUBCC V6, V7, V8      // All equal?
+	BLT        CR6, found      // Yes
+	ADD        $1, R7          // On to next byte
+	CMP        R7, LASTSTR     // Check if last start byte
+	BGT        notfound        // If too high, not found
+	BR         index17to32loop // Continue
+
+notfound:
+	MOVD $-1, R8   // Return -1 if not found
+	MOVD R8, (R14)
+	RET
+
+index33plus:
+	MOVD $0, (R0) // Case not implemented
+	RET           // Crash before return
+
+foundR25:
+	SRD  $3, R25   // Convert from bits to bytes
+	ADD  R25, R7   // Add to current string address
+	SUB  R3, R7    // Subtract from start of string
+	MOVD R7, (R14) // Return byte where found
+	RET
+
+found:
+	SUB  R3, R7    // Return byte where found
+	MOVD R7, (R14)
+	RET
+
+TEXT indexbodyp9<>(SB), NOSPLIT|NOFRAME, $0
+	CMP      R6, R4                // Compare lengths
+	BGT      notfound              // If sep len is > string, notfound
+	ADD      R4, R3, LASTBYTE      // find last byte addr
+	SUB      R6, LASTBYTE, LASTSTR // LAST=&s[len(s)-len(sep)] (last valid start index)
+	CMP      R6, $0                // Check sep len
+	BEQ      notfound              // sep len 0 -- not found
+	MOVD     R3, R7                // Copy of string addr
+	MOVD     $16, R16              // Index value 16
+	MOVD     $17, R17              // Index value 17
+	MOVD     $18, R18              // Index value 18
+	MOVD     $1, R19               // Index value 1
+	VSPLTISB $0xFF, ONES           // splat all 1s
+
+	CMP    R6, $16, CR4        // CR4 for len(sep) >= 16
+	VOR    ONES, ONES, SEPMASK // Set up full SEPMASK
+	BGE    CR4, loadge16       // Load for len(sep) >= 16
+	SUB    R6, R16, R9         // 16-len of sep
+	SLD    $3, R9              // Set up for VSLO
+	MTVSRD R9, V9_             // Set up for VSLO
+	VSLDOI $8, V9, V9, V9      // Set up for VSLO
+	VSLO   ONES, V9, SEPMASK   // Mask for separator len(sep) < 16
+
+loadge16:
+	ANDCC $15, R5, R9 // Find byte offset of sep
+	ADD   R9, R6, R10 // Add sep len
+	CMP   R10, $16    // Check if sep len+offset > 16
+	BGE   sepcross16  // Sep crosses 16 byte boundary
+
+	RLDICR  $0, R5, $59, R8 // Adjust addr to 16 byte container
+	LXVB16X (R8)(R0), V0_   // Load 16 bytes @R8 into V0
+	SLD     $3, R9          // Set up shift count for VSLO
+	MTVSRD  R9, V8_         // Set up shift count for VSLO
+	VSLDOI  $8, V8, V8, V8
+	VSLO    V0, V8, V0      // Shift by start byte
+
+	VAND V0, SEPMASK, V0 // Mask separator (< 16)
+	BR   index2plus
+
+sepcross16:
+	LXVB16X (R5)(R0), V0_ // Load 16 bytes @R5 into V0
+
+	VAND V0, SEPMASK, V0 // mask out separator
+	BLE  CR4, index2to16
+	BR   index17plus     // Handle sep > 16
+
+index2plus:
+	CMP      R6, $2       // Check length of sep
+	BNE      index3plus   // If not 2, check for 3
+	ADD      $16, R7, R9  // Check if next 16 bytes past last
+	CMP      R9, LASTBYTE // compare with last
+	BGE      index2to16   // 2 <= len(string) <= 16
+	MOVD     $0xff00, R21 // Mask for later
+	MTVSRD   R21, V25     // Move to Vreg
+	VSPLTH   $3, V25, V31 // Splat mask
+	VSPLTH   $0, V0, V1   // Splat 1st 2 bytes of sep
+	VSPLTISB $0, V10      // Clear V10
+
+	// First case: 2 byte separator
+	// V1: 2 byte separator splatted
+	// V2: 16 bytes at addr
+	// V4: 16 bytes at addr+1
+	// Compare 2 byte separator at start
+	// and at start+1. Use VSEL to combine
+	// those results to find the first
+	// matching start byte, returning
+	// that value when found. Loop as
+	// long as len(string) > 16
+index2loop2:
+	LXVB16X (R7)(R19), V3_ // Load 16 bytes @R7+1 into V3
+
+index2loop:
+	LXVB16X  (R7)(R0), V2_   // Load 16 bytes @R7 into V2
+	VCMPEQUH V1, V2, V5      // Search for sep
+	VCMPEQUH V1, V3, V6      // Search for sep offset by 1
+	VSEL     V6, V5, V31, V7 // merge even and odd indices
+	VCLZD    V7, V18         // find index of first match
+	MFVSRD   V18, R25        // get first value
+	CMP      R25, $64        // Found if < 64
+	BLT      foundR25        // Return byte index where found
+
+	MFVSRLD V18, R25        // get second value
+	CMP     R25, $64        // Found if < 64
+	ADD     $64, R25        // Update byte offset
+	BLT     foundR25        // Return value
+	ADD     $16, R7         // R7+=16 Update string pointer
+	ADD     $17, R7, R9     // R9=F7+17 since loop unrolled
+	CMP     R9, LASTBYTE    // Compare addr+17 against last byte
+	BLT     index2loop2     // If < last, continue loop
+	CMP     R7, LASTBYTE    // Compare addr+16 against last byte
+	BLT     index2to16      // If < 16 handle specially
+	LXVB16X (R7)(R0), V3_   // Load 16 bytes @R7 into V3
+	VSLDOI  $1, V3, V10, V3 // Shift left by 1 byte
+	BR      index2loop
+
+index3plus:
+	CMP    R6, $3       // Check if sep == 3
+	BNE    index4plus   // If not check larger
+	ADD    $19, R7, R9  // Find bytes for use in this loop
+	CMP    R9, LASTBYTE // Compare against last byte
+	BGE    index2to16   // Remaining string 2<=len<=16
+	MOVD   $0xff00, R21 // Set up mask for upcoming loop
+	MTVSRD R21, V25     // Move mask to Vreg
+	VSPLTH $3, V25, V31 // Splat mask
+	VSPLTH $0, V0, V1   // Splat 1st two bytes of sep
+	VSPLTB $2, V0, V8   // Splat 3rd byte of sep
+
+	// Loop to process 3 byte separator.
+	// string[0:16] is in V2
+	// string[2:18] is in V3
+	// sep[0:2] splatted in V1
+	// sec[3] splatted in v8
+	// Load vectors at string, string+1
+	// and string+2. Compare string, string+1
+	// against first 2 bytes of separator
+	// splatted, and string+2 against 3rd
+	// byte splatted. Merge the results with
+	// VSEL to find the first byte of a match.
+
+	// Special handling for last 16 bytes if the
+	// string fits in 16 byte multiple.
+index3loop2:
+	MOVD     $2, R21          // Set up index for 2
+	VSPLTISB $0, V10          // Clear V10
+	LXVB16X  (R7)(R21), V3_   // Load 16 bytes @R7+2 into V3
+	VSLDOI   $14, V3, V10, V3 // Left justify next 2 bytes
+
+index3loop:
+	LXVB16X  (R7)(R0), V2_   // Load 16 bytes @R7
+	VSLDOI   $1, V2, V3, V4  // string[1:17]
+	VSLDOI   $2, V2, V3, V9  // string[2:18]
+	VCMPEQUH V1, V2, V5      // compare hw even indices
+	VCMPEQUH V1, V4, V6      // compare hw odd indices
+	VCMPEQUB V8, V9, V10     // compare 3rd to last byte
+	VSEL     V6, V5, V31, V7 // Find 1st matching byte using mask
+	VAND     V7, V10, V7     // AND matched bytes with matched 3rd byte
+	VCLZD    V7, V18         // Find first nonzero indexes
+	MFVSRD   V18, R25        // Move 1st doubleword
+	CMP      R25, $64        // If < 64 found
+	BLT      foundR25        // Return matching index
+
+	MFVSRLD  V18, R25     // Move 2nd doubleword
+	CMP      R25, $64     // If < 64 found
+	ADD      $64, R25     // Update byte index
+	BLT      foundR25     // Return matching index
+	ADD      $16, R7      // R7+=16 string ptr
+	ADD      $19, R7, R9  // Number of string bytes for loop
+	CMP      R9, LASTBYTE // Compare against last byte of string
+	BLT      index3loop2  // If within, continue this loop
+	CMP      R7, LASTSTR  // Compare against last start byte
+	BLT      index2to16   // Process remainder
+	VSPLTISB $0, V3       // Special case for last 16 bytes
+	BR       index3loop   // Continue this loop
+
+	// Loop to process 4 byte separator
+	// string[0:16] in V2
+	// string[3:16] in V3
+	// sep[0:4] splatted in V1
+	// Set up vectors with strings at offsets
+	// 0, 1, 2, 3 and compare against the 4 byte
+	// separator also splatted. Use VSEL with the
+	// compare results to find the first byte where
+	// a separator match is found.
+index4plus:
+	CMP  R6, $4       // Check if 4 byte separator
+	BNE  index5plus   // If not next higher
+	ADD  $20, R7, R9  // Check string size to load
+	CMP  R9, LASTBYTE // Verify string length
+	BGE  index2to16   // If not large enough, process remaining
+	MOVD $2, R15      // Set up index
+
+	// Set up masks for use with VSEL
+	MOVD    $0xff, R21 // Set up mask 0xff000000ff000000...
+	SLD     $24, R21
+	MTVSRWS R21, V29
+
+	VSLDOI  $2, V29, V29, V30 // Mask 0x0000ff000000ff00...
+	MOVD    $0xffff, R21
+	SLD     $16, R21
+	MTVSRWS R21, V31
+
+	VSPLTW $0, V0, V1 // Splat 1st word of separator
+
+index4loop:
+	LXVB16X (R7)(R0), V2_ // Load 16 bytes @R7 into V2
+
+next4:
+	VSPLTISB $0, V10            // Clear
+	MOVD     $3, R9             // Number of bytes beyond 16
+	LXVB16X  (R7)(R9), V3_      // Load 16 bytes @R7 into V2
+	VSLDOI   $13, V3, V10, V3   // Shift left last 3 bytes
+	VSLDOI   $1, V2, V3, V4     // V4=(V2:V3)<<1
+	VSLDOI   $2, V2, V3, V9     // V9=(V2:V3)<<2
+	VSLDOI   $3, V2, V3, V10    // V10=(V2:v3)<<3
+	VCMPEQUW V1, V2, V5         // compare index 0, 4, ... with sep
+	VCMPEQUW V1, V4, V6         // compare index 1, 5, ... with sep
+	VCMPEQUW V1, V9, V11        // compare index 2, 6, ... with sep
+	VCMPEQUW V1, V10, V12       // compare index 3, 7, ... with sep
+	VSEL     V6, V5, V29, V13   // merge index 0, 1, 4, 5, using mask
+	VSEL     V12, V11, V30, V14 // merge index 2, 3, 6, 7, using mask
+	VSEL     V14, V13, V31, V7  // final merge
+	VCLZD    V7, V18            // Find first index for each half
+	MFVSRD   V18, R25           // Isolate value
+	CMP      R25, $64           // If < 64, found
+	BLT      foundR25           // Return found index
+
+	MFVSRLD V18, R25     // Isolate other value
+	CMP     R25, $64     // If < 64, found
+	ADD     $64, R25     // Update index for high doubleword
+	BLT     foundR25     // Return found index
+	ADD     $16, R7      // R7+=16 for next string
+	ADD     $20, R7, R9  // R+20 for all bytes to load
+	CMP     R9, LASTBYTE // Past end? Maybe check for extra?
+	BLT     index4loop   // If not, continue loop
+	CMP     R7, LASTSTR  // Check remainder
+	BLE     index2to16   // Process remainder
+	BR      notfound     // Not found
+
+index5plus:
+	CMP R6, $16     // Check for sep > 16
+	BGT index17plus // Handle large sep
+
+	// Assumption is that the separator is smaller than the string at this point
+index2to16:
+	CMP R7, LASTSTR // Compare last start byte
+	BGT notfound    // last takes len(sep) into account
+
+	ADD $16, R7, R9    // Check for last byte of string
+	CMP R9, LASTBYTE
+	BGT index2to16tail
+
+	// At least 16 bytes of string left
+	// Mask the number of bytes in sep
+index2to16loop:
+	LXVB16X (R7)(R0), V1_ // Load 16 bytes @R7 into V1
+
+compare:
+	VAND       V1, SEPMASK, V2 // Mask out sep size
+	VCMPEQUBCC V0, V2, V3      // Compare masked string
+	BLT        CR6, found      // All equal
+	ADD        $1, R7          // Update ptr to next byte
+	CMP        R7, LASTSTR     // Still less than last start byte
+	BGT        notfound        // Not found
+	ADD        $16, R7, R9     // Verify remaining bytes
+	CMP        R9, LASTBYTE    // At least 16
+	BLT        index2to16loop  // Try again
+
+	// Less than 16 bytes remaining in string
+	// Separator >= 2
+index2to16tail:
+	ADD     R3, R4, R9     // End of string
+	SUB     R7, R9, R9     // Number of bytes left
+	ANDCC   $15, R7, R10   // 16 byte offset
+	ADD     R10, R9, R11   // offset + len
+	CMP     R11, $16       // >= 16?
+	BLE     short          // Does not cross 16 bytes
+	LXVB16X (R7)(R0), V1_  // Load 16 bytes @R7 into V1
+	BR      index2to16next // Continue on
+
+short:
+	RLDICR   $0, R7, $59, R9 // Adjust addr to 16 byte container
+	LXVB16X  (R9)(R0), V1_   // Load 16 bytes @R9 into V1
+	SLD      $3, R10         // Set up shift
+	MTVSRD   R10, V8_        // Set up shift
+	VSLDOI   $8, V8, V8, V8
+	VSLO     V1, V8, V1      // Shift by start byte
+	VSPLTISB $0, V25         // Clear for later use
+
+index2to16next:
+	VAND       V1, SEPMASK, V2 // Just compare size of sep
+	VCMPEQUBCC V0, V2, V3      // Compare sep and partial string
+	BLT        CR6, found      // Found
+	ADD        $1, R7          // Not found, try next partial string
+	CMP        R7, LASTSTR     // Check for end of string
+	BGT        notfound        // If at end, then not found
+	VSLDOI     $1, V1, V25, V1 // Shift string left by 1 byte
+	BR         index2to16next  // Check the next partial string
+
+index17plus:
+	CMP      R6, $32       // Check if 17 < len(sep) <= 32
+	BGT      index33plus
+	SUB      $16, R6, R9   // Extra > 16
+	SLD      $56, R9, R10  // Shift to use in VSLO
+	MTVSRD   R10, V9_      // Set up for VSLO
+	LXVB16X  (R5)(R9), V1_ // Load 16 bytes @R5+R9 into V1
+	VSLO     V1, V9, V1    // Shift left
+	VSPLTISB $0xff, V7     // Splat 1s
+	VSPLTISB $0, V27       // Splat 0
+
+index17to32loop:
+	LXVB16X (R7)(R0), V2_ // Load 16 bytes @R7 into V2
+
+next17:
+	LXVB16X    (R7)(R9), V3_   // Load 16 bytes @R7+R9 into V3
+	VSLO       V3, V9, V3      // Shift left
+	VCMPEQUB   V0, V2, V4      // Compare first 16 bytes
+	VCMPEQUB   V1, V3, V5      // Compare extra over 16 bytes
+	VAND       V4, V5, V6      // Check if both equal
+	VCMPEQUBCC V6, V7, V8      // All equal?
+	BLT        CR6, found      // Yes
+	ADD        $1, R7          // On to next byte
+	CMP        R7, LASTSTR     // Check if last start byte
+	BGT        notfound        // If too high, not found
+	BR         index17to32loop // Continue
+
+notfound:
+	MOVD $-1, R8   // Return -1 if not found
+	MOVD R8, (R14)
+	RET
+
+index33plus:
+	MOVD $0, (R0) // Case not implemented
+	RET           // Crash before return
+
+foundR25:
+	SRD  $3, R25   // Convert from bits to bytes
+	ADD  R25, R7   // Add to current string address
+	SUB  R3, R7    // Subtract from start of string
+	MOVD R7, (R14) // Return byte where found
+	RET
+
+found:
+	SUB  R3, R7    // Return byte where found
+	MOVD R7, (R14)
+	RET
+
diff --git a/src/internal/bytealg/index_s390x.go b/src/internal/bytealg/index_s390x.go
new file mode 100644
index 0000000..9340cf1
--- /dev/null
+++ b/src/internal/bytealg/index_s390x.go
@@ -0,0 +1,31 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+import "internal/cpu"
+
+const MaxBruteForce = 64
+
+func init() {
+	// Note: we're kind of lucky that this flag is available at this point.
+	// The runtime sets HasVX when processing auxv records, and that happens
+	// to happen *before* running the init functions of packages that
+	// the runtime depends on.
+	// TODO: it would really be nicer for internal/cpu to figure out this
+	// flag by itself. Then we wouldn't need to depend on quirks of
+	// early startup initialization order.
+	if cpu.S390X.HasVX {
+		MaxLen = 64
+	}
+}
+
+// Cutover reports the number of failures of IndexByte we should tolerate
+// before switching over to Index.
+// n is the number of bytes processed so far.
+// See the bytes.Index implementation for details.
+func Cutover(n int) int {
+	// 1 error per 8 characters, plus a few slop to start.
+	return (n + 16) / 8
+}
diff --git a/src/internal/bytealg/index_s390x.s b/src/internal/bytealg/index_s390x.s
new file mode 100644
index 0000000..491d5bc
--- /dev/null
+++ b/src/internal/bytealg/index_s390x.s
@@ -0,0 +1,216 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// Caller must confirm availability of vx facility before calling.
+TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
+	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
+	LMG	b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
+	MOVD	$ret+48(FP), R5
+	BR	indexbody<>(SB)
+
+// Caller must confirm availability of vx facility before calling.
+TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
+	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
+	LMG	b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
+	MOVD	$ret+32(FP), R5
+	BR	indexbody<>(SB)
+
+// s: string we are searching
+// sep: string to search for
+// R1=&s[0], R2=len(s)
+// R3=&sep[0], R4=len(sep)
+// R5=&ret (int)
+// Caller must confirm availability of vx facility before calling.
+TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
+	CMPBGT	R4, R2, notfound
+	ADD	R1, R2
+	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
+	CMPBEQ	R4, $0, notfound
+	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
+	VLL	R4, (R3), V0 // contains first 16 bytes of sep
+	MOVD	R1, R7
+index2plus:
+	CMPBNE	R4, $1, index3plus
+	MOVD	$15(R7), R9
+	CMPBGE	R9, R2, index2to16
+	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
+	VONE	V16
+	VREPH	$0, V0, V1
+	CMPBGE	R9, R2, index2to16
+index2loop:
+	VL	0(R7), V2          // 16 bytes, even indices
+	VL	1(R7), V4          // 16 bytes, odd indices
+	VCEQH	V1, V2, V5         // compare even indices
+	VCEQH	V1, V4, V6         // compare odd indices
+	VSEL	V5, V6, V31, V7    // merge even and odd indices
+	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
+	BLT	foundV17
+	MOVD	$16(R7), R7        // R7+=16
+	ADD	$15, R7, R9
+	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
+	CMPBLE	R7, R2, index2to16
+	BR	notfound
+
+index3plus:
+	CMPBNE	R4, $2, index4plus
+	ADD	$15, R7, R9
+	CMPBGE	R9, R2, index2to16
+	MOVD	$1, R0
+	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
+	VONE	V16
+	VREPH	$0, V0, V1
+	VREPB	$2, V0, V8
+index3loop:
+	VL	(R7), V2           // load 16-bytes into V2
+	VLL	R0, 16(R7), V3     // load 2-bytes into V3
+	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
+	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
+	VCEQH	V1, V2, V5         // compare 2-byte even indices
+	VCEQH	V1, V4, V6         // compare 2-byte odd indices
+	VCEQB	V8, V9, V10        // compare last bytes
+	VSEL	V5, V6, V31, V7    // merge even and odd indices
+	VN	V7, V10, V7        // AND indices with last byte
+	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
+	BLT	foundV17
+	MOVD	$16(R7), R7        // R7+=16
+	ADD	$15, R7, R9
+	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
+	CMPBLE	R7, R2, index2to16
+	BR	notfound
+
+index4plus:
+	CMPBNE	R4, $3, index5plus
+	ADD	$15, R7, R9
+	CMPBGE	R9, R2, index2to16
+	MOVD	$2, R0
+	VGBM	$0x8888, V29       // 0xff000000ff000000...
+	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
+	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
+	VONE	V16
+	VREPF	$0, V0, V1
+index4loop:
+	VL	(R7), V2           // load 16-bytes into V2
+	VLL	R0, 16(R7), V3     // load 3-bytes into V3
+	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
+	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
+	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
+	VCEQF	V1, V2, V5         // compare index 0, 4, ...
+	VCEQF	V1, V4, V6         // compare index 1, 5, ...
+	VCEQF	V1, V9, V11        // compare index 2, 6, ...
+	VCEQF	V1, V10, V12       // compare index 3, 7, ...
+	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
+	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
+	VSEL	V13, V14, V31, V7  // final merge
+	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
+	BLT	foundV17
+	MOVD	$16(R7), R7        // R7+=16
+	ADD	$15, R7, R9
+	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
+	CMPBLE	R7, R2, index2to16
+	BR	notfound
+
+index5plus:
+	CMPBGT	R4, $15, index17plus
+index2to16:
+	CMPBGT	R7, R2, notfound
+	MOVD	$1(R7), R8
+	CMPBGT	R8, R2, index2to16tail
+index2to16loop:
+	// unrolled 2x
+	VLL	R4, (R7), V1
+	VLL	R4, 1(R7), V2
+	VCEQGS	V0, V1, V3
+	BEQ	found
+	MOVD	$1(R7), R7
+	VCEQGS	V0, V2, V4
+	BEQ	found
+	MOVD	$1(R7), R7
+	CMPBLT	R7, R2, index2to16loop
+	CMPBGT	R7, R2, notfound
+index2to16tail:
+	VLL	R4, (R7), V1
+	VCEQGS	V0, V1, V2
+	BEQ	found
+	BR	notfound
+
+index17plus:
+	CMPBGT	R4, $31, index33plus
+	SUB	$16, R4, R0
+	VLL	R0, 16(R3), V1
+	VONE	V7
+index17to32loop:
+	VL	(R7), V2
+	VLL	R0, 16(R7), V3
+	VCEQG	V0, V2, V4
+	VCEQG	V1, V3, V5
+	VN	V4, V5, V6
+	VCEQGS	V6, V7, V8
+	BEQ	found
+	MOVD	$1(R7), R7
+	CMPBLE  R7, R2, index17to32loop
+	BR	notfound
+
+index33plus:
+	CMPBGT	R4, $47, index49plus
+	SUB	$32, R4, R0
+	VL	16(R3), V1
+	VLL	R0, 32(R3), V2
+	VONE	V11
+index33to48loop:
+	VL	(R7), V3
+	VL	16(R7), V4
+	VLL	R0, 32(R7), V5
+	VCEQG	V0, V3, V6
+	VCEQG	V1, V4, V7
+	VCEQG	V2, V5, V8
+	VN	V6, V7, V9
+	VN	V8, V9, V10
+	VCEQGS	V10, V11, V12
+	BEQ	found
+	MOVD	$1(R7), R7
+	CMPBLE  R7, R2, index33to48loop
+	BR	notfound
+
+index49plus:
+	CMPBGT	R4, $63, index65plus
+	SUB	$48, R4, R0
+	VL	16(R3), V1
+	VL	32(R3), V2
+	VLL	R0, 48(R3), V3
+	VONE	V15
+index49to64loop:
+	VL	(R7), V4
+	VL	16(R7), V5
+	VL	32(R7), V6
+	VLL	R0, 48(R7), V7
+	VCEQG	V0, V4, V8
+	VCEQG	V1, V5, V9
+	VCEQG	V2, V6, V10
+	VCEQG	V3, V7, V11
+	VN	V8, V9, V12
+	VN	V10, V11, V13
+	VN	V12, V13, V14
+	VCEQGS	V14, V15, V16
+	BEQ	found
+	MOVD	$1(R7), R7
+	CMPBLE  R7, R2, index49to64loop
+notfound:
+	MOVD	$-1, (R5)
+	RET
+
+index65plus:
+	// not implemented
+	MOVD	$0, (R0)
+	RET
+
+foundV17: // index is in doubleword V17[0]
+	VLGVG	$0, V17, R8
+	ADD	R8, R7
+found:
+	SUB	R1, R7
+	MOVD	R7, (R5)
+	RET
diff --git a/src/internal/bytealg/indexbyte_386.s b/src/internal/bytealg/indexbyte_386.s
new file mode 100644
index 0000000..8a03054
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_386.s
@@ -0,0 +1,34 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-20
+	MOVL	b_base+0(FP), SI
+	MOVL	b_len+4(FP), CX
+	MOVB	c+12(FP), AL
+	MOVL	SI, DI
+	CLD; REPN; SCASB
+	JZ 3(PC)
+	MOVL	$-1, ret+16(FP)
+	RET
+	SUBL	SI, DI
+	SUBL	$1, DI
+	MOVL	DI, ret+16(FP)
+	RET
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-16
+	MOVL	s_base+0(FP), SI
+	MOVL	s_len+4(FP), CX
+	MOVB	c+8(FP), AL
+	MOVL	SI, DI
+	CLD; REPN; SCASB
+	JZ 3(PC)
+	MOVL	$-1, ret+12(FP)
+	RET
+	SUBL	SI, DI
+	SUBL	$1, DI
+	MOVL	DI, ret+12(FP)
+	RET
diff --git a/src/internal/bytealg/indexbyte_amd64.s b/src/internal/bytealg/indexbyte_amd64.s
new file mode 100644
index 0000000..f78093c
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_amd64.s
@@ -0,0 +1,147 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT	·IndexByte(SB), NOSPLIT, $0-40
+	MOVQ b_base+0(FP), SI
+	MOVQ b_len+8(FP), BX
+	MOVB c+24(FP), AL
+	LEAQ ret+32(FP), R8
+	JMP  indexbytebody<>(SB)
+
+TEXT	·IndexByteString(SB), NOSPLIT, $0-32
+	MOVQ s_base+0(FP), SI
+	MOVQ s_len+8(FP), BX
+	MOVB c+16(FP), AL
+	LEAQ ret+24(FP), R8
+	JMP  indexbytebody<>(SB)
+
+// input:
+//   SI: data
+//   BX: data len
+//   AL: byte sought
+//   R8: address to put result
+TEXT	indexbytebody<>(SB), NOSPLIT, $0
+	// Shuffle X0 around so that each byte contains
+	// the character we're looking for.
+	MOVD AX, X0
+	PUNPCKLBW X0, X0
+	PUNPCKLBW X0, X0
+	PSHUFL $0, X0, X0
+
+	CMPQ BX, $16
+	JLT small
+
+	MOVQ SI, DI
+
+	CMPQ BX, $32
+	JA avx2
+sse:
+	LEAQ	-16(SI)(BX*1), AX	// AX = address of last 16 bytes
+	JMP	sseloopentry
+
+sseloop:
+	// Move the next 16-byte chunk of the data into X1.
+	MOVOU	(DI), X1
+	// Compare bytes in X0 to X1.
+	PCMPEQB	X0, X1
+	// Take the top bit of each byte in X1 and put the result in DX.
+	PMOVMSKB X1, DX
+	// Find first set bit, if any.
+	BSFL	DX, DX
+	JNZ	ssesuccess
+	// Advance to next block.
+	ADDQ	$16, DI
+sseloopentry:
+	CMPQ	DI, AX
+	JB	sseloop
+
+	// Search the last 16-byte chunk. This chunk may overlap with the
+	// chunks we've already searched, but that's ok.
+	MOVQ	AX, DI
+	MOVOU	(AX), X1
+	PCMPEQB	X0, X1
+	PMOVMSKB X1, DX
+	BSFL	DX, DX
+	JNZ	ssesuccess
+
+failure:
+	MOVQ $-1, (R8)
+	RET
+
+// We've found a chunk containing the byte.
+// The chunk was loaded from DI.
+// The index of the matching byte in the chunk is DX.
+// The start of the data is SI.
+ssesuccess:
+	SUBQ SI, DI	// Compute offset of chunk within data.
+	ADDQ DX, DI	// Add offset of byte within chunk.
+	MOVQ DI, (R8)
+	RET
+
+// handle for lengths < 16
+small:
+	TESTQ	BX, BX
+	JEQ	failure
+
+	// Check if we'll load across a page boundary.
+	LEAQ	16(SI), AX
+	TESTW	$0xff0, AX
+	JEQ	endofpage
+
+	MOVOU	(SI), X1 // Load data
+	PCMPEQB	X0, X1	// Compare target byte with each byte in data.
+	PMOVMSKB X1, DX	// Move result bits to integer register.
+	BSFL	DX, DX	// Find first set bit.
+	JZ	failure	// No set bit, failure.
+	CMPL	DX, BX
+	JAE	failure	// Match is past end of data.
+	MOVQ	DX, (R8)
+	RET
+
+endofpage:
+	MOVOU	-16(SI)(BX*1), X1	// Load data into the high end of X1.
+	PCMPEQB	X0, X1	// Compare target byte with each byte in data.
+	PMOVMSKB X1, DX	// Move result bits to integer register.
+	MOVL	BX, CX
+	SHLL	CX, DX
+	SHRL	$16, DX	// Shift desired bits down to bottom of register.
+	BSFL	DX, DX	// Find first set bit.
+	JZ	failure	// No set bit, failure.
+	MOVQ	DX, (R8)
+	RET
+
+avx2:
+	CMPB   internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
+	JNE sse
+	MOVD AX, X0
+	LEAQ -32(SI)(BX*1), R11
+	VPBROADCASTB  X0, Y1
+avx2_loop:
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPTEST Y3, Y3
+	JNZ avx2success
+	ADDQ $32, DI
+	CMPQ DI, R11
+	JLT avx2_loop
+	MOVQ R11, DI
+	VMOVDQU (DI), Y2
+	VPCMPEQB Y1, Y2, Y3
+	VPTEST Y3, Y3
+	JNZ avx2success
+	VZEROUPPER
+	MOVQ $-1, (R8)
+	RET
+
+avx2success:
+	VPMOVMSKB Y3, DX
+	BSFL DX, DX
+	SUBQ SI, DI
+	ADDQ DI, DX
+	MOVQ DX, (R8)
+	VZEROUPPER
+	RET
diff --git a/src/internal/bytealg/indexbyte_arm.s b/src/internal/bytealg/indexbyte_arm.s
new file mode 100644
index 0000000..faf9797
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_arm.s
@@ -0,0 +1,46 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-20
+	MOVW	b_base+0(FP), R0
+	MOVW	b_len+4(FP), R1
+	MOVBU	c+12(FP), R2	// byte to find
+	MOVW	$ret+16(FP), R5
+	B	indexbytebody<>(SB)
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-16
+	MOVW	s_base+0(FP), R0
+	MOVW	s_len+4(FP), R1
+	MOVBU	c+8(FP), R2	// byte to find
+	MOVW	$ret+12(FP), R5
+	B	indexbytebody<>(SB)
+
+// input:
+//  R0: data
+//  R1: data length
+//  R2: byte to find
+//  R5: address to put result
+TEXT indexbytebody<>(SB),NOSPLIT,$0-0
+	MOVW	R0, R4		// store base for later
+	ADD	R0, R1		// end
+
+loop:
+	CMP	R0, R1
+	B.EQ	notfound
+	MOVBU.P	1(R0), R3
+	CMP	R2, R3
+	B.NE	loop
+
+	SUB	$1, R0		// R0 will be one beyond the position we want
+	SUB	R4, R0		// remove base
+	MOVW	R0, (R5)
+	RET
+
+notfound:
+	MOVW	$-1, R0
+	MOVW	R0, (R5)
+	RET
diff --git a/src/internal/bytealg/indexbyte_arm64.s b/src/internal/bytealg/indexbyte_arm64.s
new file mode 100644
index 0000000..40843fb
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_arm64.s
@@ -0,0 +1,126 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-40
+	MOVD	b_base+0(FP), R0
+	MOVD	b_len+8(FP), R2
+	MOVBU	c+24(FP), R1
+	MOVD	$ret+32(FP), R8
+	B	indexbytebody<>(SB)
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-32
+	MOVD	s_base+0(FP), R0
+	MOVD	s_len+8(FP), R2
+	MOVBU	c+16(FP), R1
+	MOVD	$ret+24(FP), R8
+	B	indexbytebody<>(SB)
+
+// input:
+//   R0: data
+//   R1: byte to search
+//   R2: data len
+//   R8: address to put result
+TEXT indexbytebody<>(SB),NOSPLIT,$0
+	// Core algorithm:
+	// For each 32-byte chunk we calculate a 64-bit syndrome value,
+	// with two bits per byte. For each tuple, bit 0 is set if the
+	// relevant byte matched the requested character and bit 1 is
+	// not used (faster than using a 32bit syndrome). Since the bits
+	// in the syndrome reflect exactly the order in which things occur
+	// in the original string, counting trailing zeros allows to
+	// identify exactly which byte has matched.
+
+	CBZ	R2, fail
+	MOVD	R0, R11
+	// Magic constant 0x40100401 allows us to identify
+	// which lane matches the requested byte.
+	// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
+	// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
+	MOVD	$0x40100401, R5
+	VMOV	R1, V0.B16
+	// Work with aligned 32-byte chunks
+	BIC	$0x1f, R0, R3
+	VMOV	R5, V5.S4
+	ANDS	$0x1f, R0, R9
+	AND	$0x1f, R2, R10
+	BEQ	loop
+
+	// Input string is not 32-byte aligned. We calculate the
+	// syndrome value for the aligned 32 bytes block containing
+	// the first bytes and mask off the irrelevant part.
+	VLD1.P	(R3), [V1.B16, V2.B16]
+	SUB	$0x20, R9, R4
+	ADDS	R4, R2, R2
+	VCMEQ	V0.B16, V1.B16, V3.B16
+	VCMEQ	V0.B16, V2.B16, V4.B16
+	VAND	V5.B16, V3.B16, V3.B16
+	VAND	V5.B16, V4.B16, V4.B16
+	VADDP	V4.B16, V3.B16, V6.B16 // 256->128
+	VADDP	V6.B16, V6.B16, V6.B16 // 128->64
+	VMOV	V6.D[0], R6
+	// Clear the irrelevant lower bits
+	LSL	$1, R9, R4
+	LSR	R4, R6, R6
+	LSL	R4, R6, R6
+	// The first block can also be the last
+	BLS	masklast
+	// Have we found something already?
+	CBNZ	R6, tail
+
+loop:
+	VLD1.P	(R3), [V1.B16, V2.B16]
+	SUBS	$0x20, R2, R2
+	VCMEQ	V0.B16, V1.B16, V3.B16
+	VCMEQ	V0.B16, V2.B16, V4.B16
+	// If we're out of data we finish regardless of the result
+	BLS	end
+	// Use a fast check for the termination condition
+	VORR	V4.B16, V3.B16, V6.B16
+	VADDP	V6.D2, V6.D2, V6.D2
+	VMOV	V6.D[0], R6
+	// We're not out of data, loop if we haven't found the character
+	CBZ	R6, loop
+
+end:
+	// Termination condition found, let's calculate the syndrome value
+	VAND	V5.B16, V3.B16, V3.B16
+	VAND	V5.B16, V4.B16, V4.B16
+	VADDP	V4.B16, V3.B16, V6.B16
+	VADDP	V6.B16, V6.B16, V6.B16
+	VMOV	V6.D[0], R6
+	// Only do the clear for the last possible block with less than 32 bytes
+	// Condition flags come from SUBS in the loop
+	BHS	tail
+
+masklast:
+	// Clear the irrelevant upper bits
+	ADD	R9, R10, R4
+	AND	$0x1f, R4, R4
+	SUB	$0x20, R4, R4
+	NEG	R4<<1, R4
+	LSL	R4, R6, R6
+	LSR	R4, R6, R6
+
+tail:
+	// Check that we have found a character
+	CBZ	R6, fail
+	// Count the trailing zeros using bit reversing
+	RBIT	R6, R6
+	// Compensate the last post-increment
+	SUB	$0x20, R3, R3
+	// And count the leading zeros
+	CLZ	R6, R6
+	// R6 is twice the offset into the fragment
+	ADD	R6>>1, R3, R0
+	// Compute the offset result
+	SUB	R11, R0, R0
+	MOVD	R0, (R8)
+	RET
+
+fail:
+	MOVD	$-1, R0
+	MOVD	R0, (R8)
+	RET
diff --git a/src/internal/bytealg/indexbyte_generic.go b/src/internal/bytealg/indexbyte_generic.go
new file mode 100644
index 0000000..6ef639f
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_generic.go
@@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64 && !s390x && !arm && !arm64 && !ppc64 && !ppc64le && !mips && !mipsle && !mips64 && !mips64le && !riscv64 && !wasm
+// +build !386,!amd64,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!mips64,!mips64le,!riscv64,!wasm
+
+package bytealg
+
+func IndexByte(b []byte, c byte) int {
+	for i, x := range b {
+		if x == c {
+			return i
+		}
+	}
+	return -1
+}
+
+func IndexByteString(s string, c byte) int {
+	for i := 0; i < len(s); i++ {
+		if s[i] == c {
+			return i
+		}
+	}
+	return -1
+}
diff --git a/src/internal/bytealg/indexbyte_mips64x.s b/src/internal/bytealg/indexbyte_mips64x.s
new file mode 100644
index 0000000..0f377f5
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_mips64x.s
@@ -0,0 +1,55 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-40
+	MOVV	b_base+0(FP), R1
+	MOVV	b_len+8(FP), R2
+	MOVBU	c+24(FP), R3	// byte to find
+	MOVV	R1, R4		// store base for later
+	ADDV	R1, R2		// end
+	ADDV	$-1, R1
+
+loop:
+	ADDV	$1, R1
+	BEQ	R1, R2, notfound
+	MOVBU	(R1), R5
+	BNE	R3, R5, loop
+
+	SUBV	R4, R1		// remove base
+	MOVV	R1, ret+32(FP)
+	RET
+
+notfound:
+	MOVV	$-1, R1
+	MOVV	R1, ret+32(FP)
+	RET
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-32
+	MOVV	s_base+0(FP), R1
+	MOVV	s_len+8(FP), R2
+	MOVBU	c+16(FP), R3	// byte to find
+	MOVV	R1, R4		// store base for later
+	ADDV	R1, R2		// end
+	ADDV	$-1, R1
+
+loop:
+	ADDV	$1, R1
+	BEQ	R1, R2, notfound
+	MOVBU	(R1), R5
+	BNE	R3, R5, loop
+
+	SUBV	R4, R1		// remove base
+	MOVV	R1, ret+24(FP)
+	RET
+
+notfound:
+	MOVV	$-1, R1
+	MOVV	R1, ret+24(FP)
+	RET
diff --git a/src/internal/bytealg/indexbyte_mipsx.s b/src/internal/bytealg/indexbyte_mipsx.s
new file mode 100644
index 0000000..bed015b
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_mipsx.s
@@ -0,0 +1,53 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+// +build mips mipsle
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-20
+	MOVW	b_base+0(FP), R1
+	MOVW	b_len+4(FP), R2
+	MOVBU	c+12(FP), R3	// byte to find
+	ADDU	$1, R1, R4	// store base+1 for later
+	ADDU	R1, R2	// end
+
+loop:
+	BEQ	R1, R2, notfound
+	MOVBU	(R1), R5
+	ADDU	$1, R1
+	BNE	R3, R5, loop
+
+	SUBU	R4, R1	// R1 will be one beyond the position we want so remove (base+1)
+	MOVW	R1, ret+16(FP)
+	RET
+
+notfound:
+	MOVW	$-1, R1
+	MOVW	R1, ret+16(FP)
+	RET
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-16
+	MOVW	s_base+0(FP), R1
+	MOVW	s_len+4(FP), R2
+	MOVBU	c+8(FP), R3	// byte to find
+	ADDU	$1, R1, R4	// store base+1 for later
+	ADDU	R1, R2	// end
+
+loop:
+	BEQ	R1, R2, notfound
+	MOVBU	(R1), R5
+	ADDU	$1, R1
+	BNE	R3, R5, loop
+
+	SUBU	R4, R1	// remove (base+1)
+	MOVW	R1, ret+12(FP)
+	RET
+
+notfound:
+	MOVW	$-1, R1
+	MOVW	R1, ret+12(FP)
+	RET
diff --git a/src/internal/bytealg/indexbyte_native.go b/src/internal/bytealg/indexbyte_native.go
new file mode 100644
index 0000000..965f38f
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_native.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || s390x || arm || arm64 || ppc64 || ppc64le || mips || mipsle || mips64 || mips64le || riscv64 || wasm
+// +build 386 amd64 s390x arm arm64 ppc64 ppc64le mips mipsle mips64 mips64le riscv64 wasm
+
+package bytealg
+
+//go:noescape
+func IndexByte(b []byte, c byte) int
+
+//go:noescape
+func IndexByteString(s string, c byte) int
diff --git a/src/internal/bytealg/indexbyte_ppc64x.s b/src/internal/bytealg/indexbyte_ppc64x.s
new file mode 100644
index 0000000..8e13c5a
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_ppc64x.s
@@ -0,0 +1,316 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	b_base+0(FP), R3	// R3 = byte array pointer
+	MOVD	b_len+8(FP), R4		// R4 = length
+	MOVBZ	c+24(FP), R5		// R5 = byte
+	MOVD	$ret+32(FP), R14	// R14 = &ret
+	BR	indexbytebody<>(SB)
+
+TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
+	MOVD	s_base+0(FP), R3  // R3 = string
+	MOVD	s_len+8(FP), R4	  // R4 = length
+	MOVBZ	c+16(FP), R5	  // R5 = byte
+	MOVD	$ret+24(FP), R14  // R14 = &ret
+	BR	indexbytebody<>(SB)
+
+TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
+	MOVD	R3,R17		// Save base address for calculating the index later.
+	RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
+	RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
+	ADD	R4,R3,R7	// Last acceptable address in R7.
+	DCBT	(R8)		// Prepare cache line.
+
+	RLDIMI	$16,R5,$32,R5
+	CMPU	R4,$32		// Check if it's a small string (≤32 bytes). Those will be processed differently.
+	MOVD	$-1,R9
+	WORD	$0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
+	RLDIMI	$32,R5,$0,R5
+	MOVD	R7,R10		// Save last acceptable address in R10 for later.
+	ADD	$-1,R7,R7
+#ifdef GOARCH_ppc64le
+	SLD	R6,R9,R9	// Prepare mask for Little Endian
+#else
+	SRD	R6,R9,R9	// Same for Big Endian
+#endif
+	BLE	small_string	// Jump to the small string case if it's ≤32 bytes.
+
+	// If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
+	// in V0, V1 and V10, then branch to the preloop.
+	ANDCC	$63,R3,R11
+	BEQ	CR0,qw_align
+	RLDICL	$0,R3,$61,R11
+
+	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
+	CMPB	R12,R5,R3	// Check for a match.
+	AND	R9,R3,R3	// Mask bytes below s_base
+	RLDICL	$0,R7,$61,R6	// length-1
+	RLDICR	$0,R7,$60,R7	// Last doubleword in R7
+	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
+	BNE	CR7,done
+	ADD	$8,R8,R8
+	ADD	$-8,R4,R4
+	ADD	R4,R11,R4
+
+	// Check for quadword alignment
+	ANDCC	$15,R8,R11
+	BEQ	CR0,qw_align
+
+	// Not aligned, so handle the next doubleword
+	MOVD	0(R8),R12
+	CMPB	R12,R5,R3
+	CMPU	R3,$0,CR7
+	BNE	CR7,done
+	ADD	$8,R8,R8
+	ADD	$-8,R4,R4
+
+	// Either quadword aligned or 64-byte at this point. We can use LVX.
+qw_align:
+
+	// Set up auxiliary data for the vectorized algorithm.
+	VSPLTISB  $0,V0		// Replicate 0 across V0
+	VSPLTISB  $3,V10	// Use V10 as control for VBPERMQ
+	MTVRD	  R5,V1
+	LVSL	  (R0+R0),V11
+	VSLB	  V11,V10,V10
+	VSPLTB	  $7,V1,V1	// Replicate byte across V1
+	CMPU	  R4, $64	// If len ≤ 64, don't use the vectorized loop
+	BLE	  tail
+
+	// We will load 4 quardwords per iteration in the loop, so check for
+	// 64-byte alignment. If 64-byte aligned, then branch to the preloop.
+	ANDCC	  $63,R8,R11
+	BEQ	  CR0,preloop
+
+	// Not 64-byte aligned. Load one quadword at a time until aligned.
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
+	BNE	    CR6,found_qw_align
+	ADD	    $16,R8,R8
+	ADD	    $-16,R4,R4
+
+	ANDCC	    $63,R8,R11
+	BEQ	    CR0,preloop
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
+	BNE	    CR6,found_qw_align
+	ADD	    $16,R8,R8
+	ADD	    $-16,R4,R4
+
+	ANDCC	    $63,R8,R11
+	BEQ	    CR0,preloop
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
+	BNE	    CR6,found_qw_align
+	ADD	    $-16,R4,R4
+	ADD	    $16,R8,R8
+
+	// 64-byte aligned. Prepare for the main loop.
+preloop:
+	CMPU	R4,$64
+	BLE	tail	      // If len ≤ 64, don't use the vectorized loop
+
+	// We are now aligned to a 64-byte boundary. We will load 4 quadwords
+	// per loop iteration. The last doubleword is in R10, so our loop counter
+	// starts at (R10-R8)/64.
+	SUB	R8,R10,R6
+	SRD	$6,R6,R9      // Loop counter in R9
+	MOVD	R9,CTR
+
+	ADD	$-64,R8,R8   // Adjust index for loop entry
+	MOVD	$16,R11      // Load offsets for the vector loads
+	MOVD	$32,R9
+	MOVD	$48,R7
+
+	// Main loop we will load 64 bytes per iteration
+loop:
+	ADD	    $64,R8,R8	      // Fuse addi+lvx for performance
+	LVX	    (R8+R0),V2	      // Load 4 16-byte vectors
+	LVX	    (R8+R11),V3
+	VCMPEQUB    V1,V2,V6	      // Look for byte in each vector
+	VCMPEQUB    V1,V3,V7
+
+	LVX	    (R8+R9),V4
+	LVX	    (R8+R7),V5
+	VCMPEQUB    V1,V4,V8
+	VCMPEQUB    V1,V5,V9
+
+	VOR	    V6,V7,V11	      // Compress the result in a single vector
+	VOR	    V8,V9,V12
+	VOR	    V11,V12,V13
+	VCMPEQUBCC  V0,V13,V14	      // Check for byte
+	BGE	    CR6,found
+	BC	    16,0,loop	      // bdnz loop
+
+	// Handle the tailing bytes or R4 ≤ 64
+	RLDICL	$0,R6,$58,R4
+	ADD	$64,R8,R8
+tail:
+	CMPU	    R4,$0
+	BEQ	    notfound
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6
+	BNE	    CR6,found_qw_align
+	ADD	    $16,R8,R8
+	CMPU	    R4,$16,CR6
+	BLE	    CR6,notfound
+	ADD	    $-16,R4,R4
+
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6
+	BNE	    CR6,found_qw_align
+	ADD	    $16,R8,R8
+	CMPU	    R4,$16,CR6
+	BLE	    CR6,notfound
+	ADD	    $-16,R4,R4
+
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6
+	BNE	    CR6,found_qw_align
+	ADD	    $16,R8,R8
+	CMPU	    R4,$16,CR6
+	BLE	    CR6,notfound
+	ADD	    $-16,R4,R4
+
+	LVX	    (R8+R0),V4
+	VCMPEQUBCC  V1,V4,V6
+	BNE	    CR6,found_qw_align
+
+notfound:
+	MOVD	$-1,R3
+	MOVD	R3,(R14)
+	RET
+
+found:
+	// We will now compress the results into a single doubleword,
+	// so it can be moved to a GPR for the final index calculation.
+
+	// The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
+	// first bit of each byte into bits 48-63.
+	VBPERMQ	  V6,V10,V6
+	VBPERMQ	  V7,V10,V7
+	VBPERMQ	  V8,V10,V8
+	VBPERMQ	  V9,V10,V9
+
+	// Shift each 16-bit component into its correct position for
+	// merging into a single doubleword.
+#ifdef GOARCH_ppc64le
+	VSLDOI	  $2,V7,V7,V7
+	VSLDOI	  $4,V8,V8,V8
+	VSLDOI	  $6,V9,V9,V9
+#else
+	VSLDOI	  $6,V6,V6,V6
+	VSLDOI	  $4,V7,V7,V7
+	VSLDOI	  $2,V8,V8,V8
+#endif
+
+	// Merge V6-V9 into a single doubleword and move to a GPR.
+	VOR	V6,V7,V11
+	VOR	V8,V9,V4
+	VOR	V4,V11,V4
+	MFVRD	V4,R3
+
+#ifdef GOARCH_ppc64le
+	ADD	  $-1,R3,R11
+	ANDN	  R3,R11,R11
+	POPCNTD	  R11,R11	// Count trailing zeros (Little Endian).
+#else
+	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
+#endif
+	ADD	R8,R11,R3	// Calculate byte address
+
+return:
+	SUB	R17,R3
+	MOVD	R3,(R14)
+	RET
+
+found_qw_align:
+	// Use the same algorithm as above. Compress the result into
+	// a single doubleword and move it to a GPR for the final
+	// calculation.
+	VBPERMQ	  V6,V10,V6
+
+#ifdef GOARCH_ppc64le
+	MFVRD	  V6,R3
+	ADD	  $-1,R3,R11
+	ANDN	  R3,R11,R11
+	POPCNTD	  R11,R11
+#else
+	VSLDOI	  $6,V6,V6,V6
+	MFVRD	  V6,R3
+	CNTLZD	  R3,R11
+#endif
+	ADD	  R8,R11,R3
+	CMPU	  R11,R4
+	BLT	  return
+	BR	  notfound
+
+done:
+	// At this point, R3 has 0xFF in the same position as the byte we are
+	// looking for in the doubleword. Use that to calculate the exact index
+	// of the byte.
+#ifdef GOARCH_ppc64le
+	ADD	$-1,R3,R11
+	ANDN	R3,R11,R11
+	POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
+#else
+	CNTLZD	R3,R11		// Count leading zeros (Big Endian).
+#endif
+	CMPU	R8,R7		// Check if we are at the last doubleword.
+	SRD	$3,R11		// Convert trailing zeros to bytes.
+	ADD	R11,R8,R3
+	CMPU	R11,R6,CR7	// If at the last doubleword, check the byte offset.
+	BNE	return
+	BLE	CR7,return
+	BR	notfound
+
+small_string:
+	// We unroll this loop for better performance.
+	CMPU	R4,$0		// Check for length=0
+	BEQ	notfound
+
+	MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
+	CMPB	R12,R5,R3	// Check for a match.
+	AND	R9,R3,R3	// Mask bytes below s_base.
+	CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
+	RLDICL	$0,R7,$61,R6	// length-1
+	RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
+	CMPU	R8,R7
+	BNE	CR7,done
+	BEQ	notfound	// Hit length.
+
+	MOVDU	8(R8),R12
+	CMPB	R12,R5,R3
+	CMPU	R3,$0,CR6
+	CMPU	R8,R7
+	BNE	CR6,done
+	BEQ	notfound
+
+	MOVDU	8(R8),R12
+	CMPB	R12,R5,R3
+	CMPU	R3,$0,CR6
+	CMPU	R8,R7
+	BNE	CR6,done
+	BEQ	notfound
+
+	MOVDU	8(R8),R12
+	CMPB	R12,R5,R3
+	CMPU	R3,$0,CR6
+	CMPU	R8,R7
+	BNE	CR6,done
+	BEQ	notfound
+
+	MOVDU	8(R8),R12
+	CMPB	R12,R5,R3
+	CMPU	R3,$0,CR6
+	BNE	CR6,done
+	BR	notfound
+
diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s
new file mode 100644
index 0000000..156c303
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_riscv64.s
@@ -0,0 +1,52 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT,$0-40
+	MOV	b_base+0(FP), A1
+	MOV	b_len+8(FP), A2
+	MOVBU	c+24(FP), A3	// byte to find
+	MOV	A1, A4		// store base for later
+	ADD	A1, A2		// end
+	ADD	$-1, A1
+
+loop:
+	ADD	$1, A1
+	BEQ	A1, A2, notfound
+	MOVBU	(A1), A5
+	BNE	A3, A5, loop
+
+	SUB	A4, A1		// remove base
+	MOV	A1, ret+32(FP)
+	RET
+
+notfound:
+	MOV	$-1, A1
+	MOV	A1, ret+32(FP)
+	RET
+
+TEXT ·IndexByteString(SB),NOSPLIT,$0-32
+	MOV	s_base+0(FP), A1
+	MOV	s_len+8(FP), A2
+	MOVBU	c+16(FP), A3	// byte to find
+	MOV	A1, A4		// store base for later
+	ADD	A1, A2		// end
+	ADD	$-1, A1
+
+loop:
+	ADD	$1, A1
+	BEQ	A1, A2, notfound
+	MOVBU	(A1), A5
+	BNE	A3, A5, loop
+
+	SUB	A4, A1		// remove base
+	MOV	A1, ret+24(FP)
+	RET
+
+notfound:
+	MOV	$-1, A1
+	MOV	A1, ret+24(FP)
+	RET
diff --git a/src/internal/bytealg/indexbyte_s390x.s b/src/internal/bytealg/indexbyte_s390x.s
new file mode 100644
index 0000000..cf88d92
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_s390x.s
@@ -0,0 +1,108 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
+	MOVD	b_base+0(FP), R3// b_base => R3
+	MOVD	b_len+8(FP), R4 // b_len => R4
+	MOVBZ	c+24(FP), R5    // c => R5
+	MOVD	$ret+32(FP), R2 // &ret => R9
+	BR	indexbytebody<>(SB)
+
+TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
+	MOVD	s_base+0(FP), R3// s_base => R3
+	MOVD	s_len+8(FP), R4 // s_len => R4
+	MOVBZ	c+16(FP), R5    // c => R5
+	MOVD	$ret+24(FP), R2 // &ret => R9
+	BR	indexbytebody<>(SB)
+
+// input:
+// R3: s
+// R4: s_len
+// R5: c -- byte sought
+// R2: &ret -- address to put index into
+TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
+	CMPBEQ	R4, $0, notfound
+	MOVD	R3, R6          // store base for later
+	ADD	R3, R4, R8      // the address after the end of the string
+	//if the length is small, use loop; otherwise, use vector or srst search
+	CMPBGE	R4, $16, large
+
+residual:
+	CMPBEQ	R3, R8, notfound
+	MOVBZ	0(R3), R7
+	LA	1(R3), R3
+	CMPBNE	R7, R5, residual
+
+found:
+	SUB	R6, R3
+	SUB	$1, R3
+	MOVD	R3, 0(R2)
+	RET
+
+notfound:
+	MOVD	$-1, 0(R2)
+	RET
+
+large:
+	MOVBZ	internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1
+	CMPBNE	R1, $0, vectorimpl
+
+srstimpl:                       // no vector facility
+	MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
+srstloop:
+	WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
+	BVS	srstloop        // interrupted - continue
+	BGT	notfoundr0
+foundr0:
+	XOR	R0, R0          // reset R0
+	SUB	R6, R8          // remove base
+	MOVD	R8, 0(R2)
+	RET
+notfoundr0:
+	XOR	R0, R0          // reset R0
+	MOVD	$-1, 0(R2)
+	RET
+
+vectorimpl:
+	//if the address is not 16byte aligned, use loop for the header
+	MOVD	R3, R8
+	AND	$15, R8
+	CMPBGT	R8, $0, notaligned
+
+aligned:
+	ADD	R6, R4, R8
+	MOVD	R8, R7
+	AND	$-16, R7
+	// replicate c across V17
+	VLVGB	$0, R5, V19
+	VREPB	$0, V19, V17
+
+vectorloop:
+	CMPBGE	R3, R7, residual
+	VL	0(R3), V16    // load string to be searched into V16
+	ADD	$16, R3
+	VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
+	BVS	vectorloop
+
+	// when vector search found c in the string
+	VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
+	SUB	$16, R3
+	SUB	R6, R3
+	ADD	R3, R7
+	MOVD	R7, 0(R2)
+	RET
+
+notaligned:
+	MOVD	R3, R8
+	AND	$-16, R8
+	ADD     $16, R8
+notalignedloop:
+	CMPBEQ	R3, R8, aligned
+	MOVBZ	0(R3), R7
+	LA	1(R3), R3
+	CMPBNE	R7, R5, notalignedloop
+	BR	found
diff --git a/src/internal/bytealg/indexbyte_wasm.s b/src/internal/bytealg/indexbyte_wasm.s
new file mode 100644
index 0000000..ef4bd93
--- /dev/null
+++ b/src/internal/bytealg/indexbyte_wasm.s
@@ -0,0 +1,195 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·IndexByte(SB), NOSPLIT, $0-40
+	I64Load b_base+0(FP)
+	I32WrapI64
+	I32Load8U c+24(FP)
+	I64Load b_len+8(FP)
+	I32WrapI64
+	Call memchr<>(SB)
+	I64ExtendI32S
+	Set R0
+
+	Get SP
+	I64Const $-1
+	Get R0
+	I64Load b_base+0(FP)
+	I64Sub
+	Get R0
+	I64Eqz $0
+	Select
+	I64Store ret+32(FP)
+
+	RET
+
+TEXT ·IndexByteString(SB), NOSPLIT, $0-32
+	Get SP
+	I64Load s_base+0(FP)
+	I32WrapI64
+	I32Load8U c+16(FP)
+	I64Load s_len+8(FP)
+	I32WrapI64
+	Call memchr<>(SB)
+	I64ExtendI32S
+	Set R0
+
+	I64Const $-1
+	Get R0
+	I64Load s_base+0(FP)
+	I64Sub
+	Get R0
+	I64Eqz $0
+	Select
+	I64Store ret+24(FP)
+
+	RET
+
+// initially compiled with emscripten and then modified over time.
+// params:
+//   R0: s
+//   R1: c
+//   R2: len
+// ret: index
+TEXT memchr<>(SB), NOSPLIT, $0
+	Get R1
+	Set R4
+	Block
+		Block
+			Get R2
+			I32Const $0
+			I32Ne
+			Tee R3
+			Get R0
+			I32Const $3
+			I32And
+			I32Const $0
+			I32Ne
+			I32And
+			If
+				Loop
+					Get R0
+					I32Load8U $0
+					Get R1
+					I32Eq
+					BrIf $2
+					Get R2
+					I32Const $-1
+					I32Add
+					Tee R2
+					I32Const $0
+					I32Ne
+					Tee R3
+					Get R0
+					I32Const $1
+					I32Add
+					Tee R0
+					I32Const $3
+					I32And
+					I32Const $0
+					I32Ne
+					I32And
+					BrIf $0
+				End
+			End
+			Get R3
+			BrIf $0
+			I32Const $0
+			Set R1
+			Br $1
+		End
+		Get R0
+		I32Load8U $0
+		Get R4
+		Tee R3
+		I32Eq
+		If
+			Get R2
+			Set R1
+		Else
+			Get R4
+			I32Const $16843009
+			I32Mul
+			Set R4
+			Block
+				Block
+					Get R2
+					I32Const $3
+					I32GtU
+					If
+						Get R2
+						Set R1
+						Loop
+							Get R0
+							I32Load $0
+							Get R4
+							I32Xor
+							Tee R2
+							I32Const $-2139062144
+							I32And
+							I32Const $-2139062144
+							I32Xor
+							Get R2
+							I32Const $-16843009
+							I32Add
+							I32And
+							I32Eqz
+							If
+								Get R0
+								I32Const $4
+								I32Add
+								Set R0
+								Get R1
+								I32Const $-4
+								I32Add
+								Tee R1
+								I32Const $3
+								I32GtU
+								BrIf $1
+								Br $3
+							End
+						End
+					Else
+						Get R2
+						Set R1
+						Br $1
+					End
+					Br $1
+				End
+				Get R1
+				I32Eqz
+				If
+					I32Const $0
+					Set R1
+					Br $3
+				End
+			End
+			Loop
+				Get R0
+				I32Load8U $0
+				Get R3
+				I32Eq
+				BrIf $2
+				Get R0
+				I32Const $1
+				I32Add
+				Set R0
+				Get R1
+				I32Const $-1
+				I32Add
+				Tee R1
+				BrIf $0
+				I32Const $0
+				Set R1
+			End
+		End
+	End
+	Get R0
+	I32Const $0
+	Get R1
+	Select
+	Return
diff --git a/src/internal/cfg/cfg.go b/src/internal/cfg/cfg.go
new file mode 100644
index 0000000..815994b
--- /dev/null
+++ b/src/internal/cfg/cfg.go
@@ -0,0 +1,66 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cfg holds configuration shared by the Go command and internal/testenv.
+// Definitions that don't need to be exposed outside of cmd/go should be in
+// cmd/go/internal/cfg instead of this package.
+package cfg
+
+// KnownEnv is a list of environment variables that affect the operation
+// of the Go command.
+const KnownEnv = `
+	AR
+	CC
+	CGO_CFLAGS
+	CGO_CFLAGS_ALLOW
+	CGO_CFLAGS_DISALLOW
+	CGO_CPPFLAGS
+	CGO_CPPFLAGS_ALLOW
+	CGO_CPPFLAGS_DISALLOW
+	CGO_CXXFLAGS
+	CGO_CXXFLAGS_ALLOW
+	CGO_CXXFLAGS_DISALLOW
+	CGO_ENABLED
+	CGO_FFLAGS
+	CGO_FFLAGS_ALLOW
+	CGO_FFLAGS_DISALLOW
+	CGO_LDFLAGS
+	CGO_LDFLAGS_ALLOW
+	CGO_LDFLAGS_DISALLOW
+	CXX
+	FC
+	GCCGO
+	GO111MODULE
+	GO386
+	GOARCH
+	GOARM
+	GOBIN
+	GOCACHE
+	GOENV
+	GOEXE
+	GOEXPERIMENT
+	GOFLAGS
+	GOGCCFLAGS
+	GOHOSTARCH
+	GOHOSTOS
+	GOINSECURE
+	GOMIPS
+	GOMIPS64
+	GOMODCACHE
+	GONOPROXY
+	GONOSUMDB
+	GOOS
+	GOPATH
+	GOPPC64
+	GOPRIVATE
+	GOPROXY
+	GOROOT
+	GOSUMDB
+	GOTMPDIR
+	GOTOOLDIR
+	GOVCS
+	GOWASM
+	GO_EXTLINK_ENABLED
+	PKG_CONFIG
+`
diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go
new file mode 100644
index 0000000..dab5d06
--- /dev/null
+++ b/src/internal/cpu/cpu.go
@@ -0,0 +1,226 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cpu implements processor feature detection
+// used by the Go standard library.
+package cpu
+
+// DebugOptions is set to true by the runtime if the OS supports reading
+// GODEBUG early in runtime startup.
+// This should not be changed after it is initialized.
+var DebugOptions bool
+
+// CacheLinePad is used to pad structs to avoid false sharing.
+type CacheLinePad struct{ _ [CacheLinePadSize]byte }
+
+// CacheLineSize is the CPU's assumed cache line size.
+// There is currently no runtime detection of the real cache line size
+// so we use the constant per GOARCH CacheLinePadSize as an approximation.
+var CacheLineSize uintptr = CacheLinePadSize
+
+// The booleans in X86 contain the correspondingly named cpuid feature bit.
+// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
+// in addition to the cpuid feature bit being set.
+// The struct is padded to avoid false sharing.
+var X86 struct {
+	_            CacheLinePad
+	HasAES       bool
+	HasADX       bool
+	HasAVX       bool
+	HasAVX2      bool
+	HasBMI1      bool
+	HasBMI2      bool
+	HasERMS      bool
+	HasFMA       bool
+	HasOSXSAVE   bool
+	HasPCLMULQDQ bool
+	HasPOPCNT    bool
+	HasSSE2      bool
+	HasSSE3      bool
+	HasSSSE3     bool
+	HasSSE41     bool
+	HasSSE42     bool
+	_            CacheLinePad
+}
+
+// The booleans in ARM contain the correspondingly named cpu feature bit.
+// The struct is padded to avoid false sharing.
+var ARM struct {
+	_        CacheLinePad
+	HasVFPv4 bool
+	HasIDIVA bool
+	_        CacheLinePad
+}
+
+// The booleans in ARM64 contain the correspondingly named cpu feature bit.
+// The struct is padded to avoid false sharing.
+var ARM64 struct {
+	_            CacheLinePad
+	HasAES       bool
+	HasPMULL     bool
+	HasSHA1      bool
+	HasSHA2      bool
+	HasCRC32     bool
+	HasATOMICS   bool
+	HasCPUID     bool
+	IsNeoverseN1 bool
+	IsZeus       bool
+	_            CacheLinePad
+}
+
+var MIPS64X struct {
+	_      CacheLinePad
+	HasMSA bool // MIPS SIMD architecture
+	_      CacheLinePad
+}
+
+// For ppc64(le), it is safe to check only for ISA level starting on ISA v3.00,
+// since there are no optional categories. There are some exceptions that also
+// require kernel support to work (darn, scv), so there are feature bits for
+// those as well. The minimum processor requirement is POWER8 (ISA 2.07).
+// The struct is padded to avoid false sharing.
+var PPC64 struct {
+	_        CacheLinePad
+	HasDARN  bool // Hardware random number generator (requires kernel enablement)
+	HasSCV   bool // Syscall vectored (requires kernel enablement)
+	IsPOWER8 bool // ISA v2.07 (POWER8)
+	IsPOWER9 bool // ISA v3.00 (POWER9)
+	_        CacheLinePad
+}
+
+var S390X struct {
+	_         CacheLinePad
+	HasZARCH  bool // z architecture mode is active [mandatory]
+	HasSTFLE  bool // store facility list extended [mandatory]
+	HasLDISP  bool // long (20-bit) displacements [mandatory]
+	HasEIMM   bool // 32-bit immediates [mandatory]
+	HasDFP    bool // decimal floating point
+	HasETF3EH bool // ETF-3 enhanced
+	HasMSA    bool // message security assist (CPACF)
+	HasAES    bool // KM-AES{128,192,256} functions
+	HasAESCBC bool // KMC-AES{128,192,256} functions
+	HasAESCTR bool // KMCTR-AES{128,192,256} functions
+	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
+	HasGHASH  bool // KIMD-GHASH function
+	HasSHA1   bool // K{I,L}MD-SHA-1 functions
+	HasSHA256 bool // K{I,L}MD-SHA-256 functions
+	HasSHA512 bool // K{I,L}MD-SHA-512 functions
+	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
+	HasVX     bool // vector facility. Note: the runtime sets this when it processes auxv records.
+	HasVXE    bool // vector-enhancements facility 1
+	HasKDSA   bool // elliptic curve functions
+	HasECDSA  bool // NIST curves
+	HasEDDSA  bool // Edwards curves
+	_         CacheLinePad
+}
+
+// Initialize examines the processor and sets the relevant variables above.
+// This is called by the runtime package early in program initialization,
+// before normal init functions are run. env is set by runtime if the OS supports
+// cpu feature options in GODEBUG.
+func Initialize(env string) {
+	doinit()
+	processOptions(env)
+}
+
+// options contains the cpu debug options that can be used in GODEBUG.
+// Options are arch dependent and are added by the arch specific doinit functions.
+// Features that are mandatory for the specific GOARCH should not be added to options
+// (e.g. SSE2 on amd64).
+var options []option
+
+// Option names should be lower case. e.g. avx instead of AVX.
+type option struct {
+	Name      string
+	Feature   *bool
+	Specified bool // whether feature value was specified in GODEBUG
+	Enable    bool // whether feature should be enabled
+	Required  bool // whether feature is mandatory and can not be disabled
+}
+
+// processOptions enables or disables CPU feature values based on the parsed env string.
+// The env string is expected to be of the form cpu.feature1=value1,cpu.feature2=value2...
+// where feature names is one of the architecture specific list stored in the
+// cpu packages options variable and values are either 'on' or 'off'.
+// If env contains cpu.all=off then all cpu features referenced through the options
+// variable are disabled. Other feature names and values result in warning messages.
+func processOptions(env string) {
+field:
+	for env != "" {
+		field := ""
+		i := indexByte(env, ',')
+		if i < 0 {
+			field, env = env, ""
+		} else {
+			field, env = env[:i], env[i+1:]
+		}
+		if len(field) < 4 || field[:4] != "cpu." {
+			continue
+		}
+		i = indexByte(field, '=')
+		if i < 0 {
+			print("GODEBUG: no value specified for \"", field, "\"\n")
+			continue
+		}
+		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
+
+		var enable bool
+		switch value {
+		case "on":
+			enable = true
+		case "off":
+			enable = false
+		default:
+			print("GODEBUG: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
+			continue field
+		}
+
+		if key == "all" {
+			for i := range options {
+				options[i].Specified = true
+				options[i].Enable = enable || options[i].Required
+			}
+			continue field
+		}
+
+		for i := range options {
+			if options[i].Name == key {
+				options[i].Specified = true
+				options[i].Enable = enable
+				continue field
+			}
+		}
+
+		print("GODEBUG: unknown cpu feature \"", key, "\"\n")
+	}
+
+	for _, o := range options {
+		if !o.Specified {
+			continue
+		}
+
+		if o.Enable && !*o.Feature {
+			print("GODEBUG: can not enable \"", o.Name, "\", missing CPU support\n")
+			continue
+		}
+
+		if !o.Enable && o.Required {
+			print("GODEBUG: can not disable \"", o.Name, "\", required CPU feature\n")
+			continue
+		}
+
+		*o.Feature = o.Enable
+	}
+}
+
+// indexByte returns the index of the first instance of c in s,
+// or -1 if c is not present in s.
+func indexByte(s string, c byte) int {
+	for i := 0; i < len(s); i++ {
+		if s[i] == c {
+			return i
+		}
+	}
+	return -1
+}
diff --git a/src/internal/cpu/cpu.s b/src/internal/cpu/cpu.s
new file mode 100644
index 0000000..3c770c1
--- /dev/null
+++ b/src/internal/cpu/cpu.s
@@ -0,0 +1,6 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This assembly file exists to allow internal/cpu to call
+// non-exported runtime functions that use "go:linkname".
+\ No newline at end of file
diff --git a/src/internal/cpu/cpu_386.go b/src/internal/cpu/cpu_386.go
new file mode 100644
index 0000000..561c81f
--- /dev/null
+++ b/src/internal/cpu/cpu_386.go
@@ -0,0 +1,7 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const GOARCH = "386"
diff --git a/src/internal/cpu/cpu_amd64.go b/src/internal/cpu/cpu_amd64.go
new file mode 100644
index 0000000..9b00153
--- /dev/null
+++ b/src/internal/cpu/cpu_amd64.go
@@ -0,0 +1,7 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const GOARCH = "amd64"
diff --git a/src/internal/cpu/cpu_arm.go b/src/internal/cpu/cpu_arm.go
new file mode 100644
index 0000000..b624526
--- /dev/null
+++ b/src/internal/cpu/cpu_arm.go
@@ -0,0 +1,34 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 32
+
+// arm doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
+// These are initialized by archauxv() and should not be changed after they are
+// initialized.
+var HWCap uint
+var HWCap2 uint
+
+// HWCAP/HWCAP2 bits. These are exposed by Linux and FreeBSD.
+const (
+	hwcap_VFPv4 = 1 << 16
+	hwcap_IDIVA = 1 << 17
+)
+
+func doinit() {
+	options = []option{
+		{Name: "vfpv4", Feature: &ARM.HasVFPv4},
+		{Name: "idiva", Feature: &ARM.HasIDIVA},
+	}
+
+	// HWCAP feature bits
+	ARM.HasVFPv4 = isSet(HWCap, hwcap_VFPv4)
+	ARM.HasIDIVA = isSet(HWCap, hwcap_IDIVA)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/src/internal/cpu/cpu_arm64.go b/src/internal/cpu/cpu_arm64.go
new file mode 100644
index 0000000..f64d9e4
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64.go
@@ -0,0 +1,28 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 64
+
+func doinit() {
+	options = []option{
+		{Name: "aes", Feature: &ARM64.HasAES},
+		{Name: "pmull", Feature: &ARM64.HasPMULL},
+		{Name: "sha1", Feature: &ARM64.HasSHA1},
+		{Name: "sha2", Feature: &ARM64.HasSHA2},
+		{Name: "crc32", Feature: &ARM64.HasCRC32},
+		{Name: "atomics", Feature: &ARM64.HasATOMICS},
+		{Name: "cpuid", Feature: &ARM64.HasCPUID},
+		{Name: "isNeoverseN1", Feature: &ARM64.IsNeoverseN1},
+		{Name: "isZeus", Feature: &ARM64.IsZeus},
+	}
+
+	// arm64 uses different ways to detect CPU features at runtime depending on the operating system.
+	osInit()
+}
+
+func getisar0() uint64
+
+func getMIDR() uint64
diff --git a/src/internal/cpu/cpu_arm64.s b/src/internal/cpu/cpu_arm64.s
new file mode 100644
index 0000000..d6e7f44
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64.s
@@ -0,0 +1,18 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func getisar0() uint64
+TEXT ·getisar0(SB),NOSPLIT,$0
+	// get Instruction Set Attributes 0 into R0
+	MRS	ID_AA64ISAR0_EL1, R0
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getMIDR() uint64
+TEXT ·getMIDR(SB), NOSPLIT, $0-8
+	MRS	MIDR_EL1, R0
+	MOVD	R0, ret+0(FP)
+	RET
diff --git a/src/internal/cpu/cpu_arm64_android.go b/src/internal/cpu/cpu_arm64_android.go
new file mode 100644
index 0000000..ac6eee5
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_android.go
@@ -0,0 +1,12 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64
+// +build arm64
+
+package cpu
+
+func osInit() {
+	hwcapInit("android")
+}
diff --git a/src/internal/cpu/cpu_arm64_darwin.go b/src/internal/cpu/cpu_arm64_darwin.go
new file mode 100644
index 0000000..ce1b250
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_darwin.go
@@ -0,0 +1,33 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 && darwin && !ios
+// +build arm64,darwin,!ios
+
+package cpu
+
+func osInit() {
+	ARM64.HasATOMICS = sysctlEnabled([]byte("hw.optional.armv8_1_atomics\x00"))
+	ARM64.HasCRC32 = sysctlEnabled([]byte("hw.optional.armv8_crc32\x00"))
+
+	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
+	// to detect their supported state dynamically. Assume the CPU features that
+	// Apple Silicon M1 supports to be available as a minimal set of features
+	// to all Go programs running on darwin/arm64.
+	ARM64.HasAES = true
+	ARM64.HasPMULL = true
+	ARM64.HasSHA1 = true
+	ARM64.HasSHA2 = true
+}
+
+//go:noescape
+func getsysctlbyname(name []byte) (int32, int32)
+
+func sysctlEnabled(name []byte) bool {
+	ret, value := getsysctlbyname(name)
+	if ret < 0 {
+		return false
+	}
+	return value > 0
+}
diff --git a/src/internal/cpu/cpu_arm64_freebsd.go b/src/internal/cpu/cpu_arm64_freebsd.go
new file mode 100644
index 0000000..8c48137
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_freebsd.go
@@ -0,0 +1,46 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64
+// +build arm64
+
+package cpu
+
+func osInit() {
+	// Retrieve info from system register ID_AA64ISAR0_EL1.
+	isar0 := getisar0()
+
+	// ID_AA64ISAR0_EL1
+	switch extractBits(isar0, 4, 7) {
+	case 1:
+		ARM64.HasAES = true
+	case 2:
+		ARM64.HasAES = true
+		ARM64.HasPMULL = true
+	}
+
+	switch extractBits(isar0, 8, 11) {
+	case 1:
+		ARM64.HasSHA1 = true
+	}
+
+	switch extractBits(isar0, 12, 15) {
+	case 1, 2:
+		ARM64.HasSHA2 = true
+	}
+
+	switch extractBits(isar0, 16, 19) {
+	case 1:
+		ARM64.HasCRC32 = true
+	}
+
+	switch extractBits(isar0, 20, 23) {
+	case 2:
+		ARM64.HasATOMICS = true
+	}
+}
+
+func extractBits(data uint64, start, end uint) uint {
+	return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
diff --git a/src/internal/cpu/cpu_arm64_hwcap.go b/src/internal/cpu/cpu_arm64_hwcap.go
new file mode 100644
index 0000000..8ac04fd
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_hwcap.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 && linux
+// +build arm64,linux
+
+package cpu
+
+// HWCap may be initialized by archauxv and
+// should not be changed after it was initialized.
+var HWCap uint
+
+// HWCAP bits. These are exposed by Linux.
+const (
+	hwcap_AES     = 1 << 3
+	hwcap_PMULL   = 1 << 4
+	hwcap_SHA1    = 1 << 5
+	hwcap_SHA2    = 1 << 6
+	hwcap_CRC32   = 1 << 7
+	hwcap_ATOMICS = 1 << 8
+	hwcap_CPUID   = 1 << 11
+)
+
+func hwcapInit(os string) {
+	// HWCap was populated by the runtime from the auxiliary vector.
+	// Use HWCap information since reading aarch64 system registers
+	// is not supported in user space on older linux kernels.
+	ARM64.HasAES = isSet(HWCap, hwcap_AES)
+	ARM64.HasPMULL = isSet(HWCap, hwcap_PMULL)
+	ARM64.HasSHA1 = isSet(HWCap, hwcap_SHA1)
+	ARM64.HasSHA2 = isSet(HWCap, hwcap_SHA2)
+	ARM64.HasCRC32 = isSet(HWCap, hwcap_CRC32)
+	ARM64.HasCPUID = isSet(HWCap, hwcap_CPUID)
+
+	// The Samsung S9+ kernel reports support for atomics, but not all cores
+	// actually support them, resulting in SIGILL. See issue #28431.
+	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
+	ARM64.HasATOMICS = isSet(HWCap, hwcap_ATOMICS) && os != "android"
+
+	// Check to see if executing on a NeoverseN1 and in order to do that,
+	// check the AUXV for the CPUID bit. The getMIDR function executes an
+	// instruction which would normally be an illegal instruction, but it's
+	// trapped by the kernel, the value sanitized and then returned. Without
+	// the CPUID bit the kernel will not trap the instruction and the process
+	// will be terminated with SIGILL.
+	if ARM64.HasCPUID {
+		midr := getMIDR()
+		part_num := uint16((midr >> 4) & 0xfff)
+		implementor := byte((midr >> 24) & 0xff)
+
+		if implementor == 'A' && part_num == 0xd0c {
+			ARM64.IsNeoverseN1 = true
+		}
+		if implementor == 'A' && part_num == 0xd40 {
+			ARM64.IsZeus = true
+		}
+	}
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/src/internal/cpu/cpu_arm64_linux.go b/src/internal/cpu/cpu_arm64_linux.go
new file mode 100644
index 0000000..c3a3f9a
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_linux.go
@@ -0,0 +1,12 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 && linux && !android
+// +build arm64,linux,!android
+
+package cpu
+
+func osInit() {
+	hwcapInit("linux")
+}
diff --git a/src/internal/cpu/cpu_arm64_other.go b/src/internal/cpu/cpu_arm64_other.go
new file mode 100644
index 0000000..e8b5d52
--- /dev/null
+++ b/src/internal/cpu/cpu_arm64_other.go
@@ -0,0 +1,18 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 && !linux && !freebsd && !android && (!darwin || ios)
+// +build arm64
+// +build !linux
+// +build !freebsd
+// +build !android
+// +build !darwin ios
+
+package cpu
+
+func osInit() {
+	// Other operating systems do not support reading HWCap from auxiliary vector,
+	// reading privileged aarch64 system registers or sysctl in user space to detect
+	// CPU features at runtime.
+}
diff --git a/src/internal/cpu/cpu_mips.go b/src/internal/cpu/cpu_mips.go
new file mode 100644
index 0000000..14a9c97
--- /dev/null
+++ b/src/internal/cpu/cpu_mips.go
@@ -0,0 +1,10 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 32
+
+func doinit() {
+}
diff --git a/src/internal/cpu/cpu_mips64x.go b/src/internal/cpu/cpu_mips64x.go
new file mode 100644
index 0000000..d2f9d44
--- /dev/null
+++ b/src/internal/cpu/cpu_mips64x.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+package cpu
+
+const CacheLinePadSize = 32
+
+// This is initialized by archauxv and should not be changed after it is
+// initialized.
+var HWCap uint
+
+// HWCAP bits. These are exposed by the Linux kernel 5.4.
+const (
+	// CPU features
+	hwcap_MIPS_MSA = 1 << 1
+)
+
+func doinit() {
+	options = []option{
+		{Name: "msa", Feature: &MIPS64X.HasMSA},
+	}
+
+	// HWCAP feature bits
+	MIPS64X.HasMSA = isSet(HWCap, hwcap_MIPS_MSA)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/src/internal/cpu/cpu_mipsle.go b/src/internal/cpu/cpu_mipsle.go
new file mode 100644
index 0000000..14a9c97
--- /dev/null
+++ b/src/internal/cpu/cpu_mipsle.go
@@ -0,0 +1,10 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 32
+
+func doinit() {
+}
diff --git a/src/internal/cpu/cpu_no_name.go b/src/internal/cpu/cpu_no_name.go
new file mode 100644
index 0000000..8d563b5
--- /dev/null
+++ b/src/internal/cpu/cpu_no_name.go
@@ -0,0 +1,19 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64
+// +build !386,!amd64
+
+package cpu
+
+// Name returns the CPU name given by the vendor
+// if it can be read directly from memory or by CPU instructions.
+// If the CPU name can not be determined an empty string is returned.
+//
+// Implementations that use the Operating System (e.g. sysctl or /sys/)
+// to gather CPU information for display should be placed in internal/sysinfo.
+func Name() string {
+	// "A CPU has no name".
+	return ""
+}
diff --git a/src/internal/cpu/cpu_ppc64x.go b/src/internal/cpu/cpu_ppc64x.go
new file mode 100644
index 0000000..2e7fd3e
--- /dev/null
+++ b/src/internal/cpu/cpu_ppc64x.go
@@ -0,0 +1,24 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+package cpu
+
+const CacheLinePadSize = 128
+
+func doinit() {
+	options = []option{
+		{Name: "darn", Feature: &PPC64.HasDARN},
+		{Name: "scv", Feature: &PPC64.HasSCV},
+		{Name: "power9", Feature: &PPC64.IsPOWER9},
+	}
+
+	osinit()
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/src/internal/cpu/cpu_ppc64x_aix.go b/src/internal/cpu/cpu_ppc64x_aix.go
new file mode 100644
index 0000000..3d17a9c
--- /dev/null
+++ b/src/internal/cpu/cpu_ppc64x_aix.go
@@ -0,0 +1,22 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+package cpu
+
+const (
+	// getsystemcfg constants
+	_SC_IMPL     = 2
+	_IMPL_POWER9 = 0x20000
+)
+
+func osinit() {
+	impl := getsystemcfg(_SC_IMPL)
+	PPC64.IsPOWER9 = isSet(impl, _IMPL_POWER9)
+}
+
+// getsystemcfg is defined in runtime/os2_aix.go
+func getsystemcfg(label uint) uint
diff --git a/src/internal/cpu/cpu_ppc64x_linux.go b/src/internal/cpu/cpu_ppc64x_linux.go
new file mode 100644
index 0000000..b7c7345
--- /dev/null
+++ b/src/internal/cpu/cpu_ppc64x_linux.go
@@ -0,0 +1,30 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+package cpu
+
+// ppc64 doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
+// These are initialized by archauxv and should not be changed after they are
+// initialized.
+var HWCap uint
+var HWCap2 uint
+
+// HWCAP bits. These are exposed by Linux.
+const (
+	// ISA Level
+	hwcap2_ARCH_3_00 = 0x00800000
+
+	// CPU features
+	hwcap2_DARN = 0x00200000
+	hwcap2_SCV  = 0x00100000
+)
+
+func osinit() {
+	PPC64.IsPOWER9 = isSet(HWCap2, hwcap2_ARCH_3_00)
+	PPC64.HasDARN = isSet(HWCap2, hwcap2_DARN)
+	PPC64.HasSCV = isSet(HWCap2, hwcap2_SCV)
+}
diff --git a/src/internal/cpu/cpu_riscv64.go b/src/internal/cpu/cpu_riscv64.go
new file mode 100644
index 0000000..54b8c33
--- /dev/null
+++ b/src/internal/cpu/cpu_riscv64.go
@@ -0,0 +1,10 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 32
+
+func doinit() {
+}
diff --git a/src/internal/cpu/cpu_s390x.go b/src/internal/cpu/cpu_s390x.go
new file mode 100644
index 0000000..45d8ed2
--- /dev/null
+++ b/src/internal/cpu/cpu_s390x.go
@@ -0,0 +1,205 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 256
+
+var HWCap uint
+
+// bitIsSet reports whether the bit at index is set. The bit index
+// is in big endian order, so bit index 0 is the leftmost bit.
+func bitIsSet(bits []uint64, index uint) bool {
+	return bits[index/64]&((1<<63)>>(index%64)) != 0
+}
+
+// function is the function code for the named function.
+type function uint8
+
+const (
+	// KM{,A,C,CTR} function codes
+	aes128 function = 18 // AES-128
+	aes192 function = 19 // AES-192
+	aes256 function = 20 // AES-256
+
+	// K{I,L}MD function codes
+	sha1     function = 1  // SHA-1
+	sha256   function = 2  // SHA-256
+	sha512   function = 3  // SHA-512
+	sha3_224 function = 32 // SHA3-224
+	sha3_256 function = 33 // SHA3-256
+	sha3_384 function = 34 // SHA3-384
+	sha3_512 function = 35 // SHA3-512
+	shake128 function = 36 // SHAKE-128
+	shake256 function = 37 // SHAKE-256
+
+	// KLMD function codes
+	ghash function = 65 // GHASH
+)
+
+const (
+	// KDSA function codes
+	ecdsaVerifyP256    function = 1  // NIST P256
+	ecdsaVerifyP384    function = 2  // NIST P384
+	ecdsaVerifyP521    function = 3  // NIST P521
+	ecdsaSignP256      function = 9  // NIST P256
+	ecdsaSignP384      function = 10 // NIST P384
+	ecdsaSignP521      function = 11 // NIST P521
+	eddsaVerifyEd25519 function = 32 // Curve25519
+	eddsaVerifyEd448   function = 36 // Curve448
+	eddsaSignEd25519   function = 40 // Curve25519
+	eddsaSignEd448     function = 44 // Curve448
+)
+
+// queryResult contains the result of a Query function
+// call. Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type queryResult struct {
+	bits [2]uint64
+}
+
+// Has reports whether the given functions are present.
+func (q *queryResult) Has(fns ...function) bool {
+	if len(fns) == 0 {
+		panic("no function codes provided")
+	}
+	for _, f := range fns {
+		if !bitIsSet(q.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+// facility is a bit index for the named facility.
+type facility uint8
+
+const (
+	// mandatory facilities
+	zarch  facility = 1  // z architecture mode is active
+	stflef facility = 7  // store-facility-list-extended
+	ldisp  facility = 18 // long-displacement
+	eimm   facility = 21 // extended-immediate
+
+	// miscellaneous facilities
+	dfp    facility = 42 // decimal-floating-point
+	etf3eh facility = 30 // extended-translation 3 enhancement
+
+	// cryptography facilities
+	msa  facility = 17  // message-security-assist
+	msa3 facility = 76  // message-security-assist extension 3
+	msa4 facility = 77  // message-security-assist extension 4
+	msa5 facility = 57  // message-security-assist extension 5
+	msa8 facility = 146 // message-security-assist extension 8
+	msa9 facility = 155 // message-security-assist extension 9
+
+	// vector facilities
+	vxe facility = 135 // vector-enhancements 1
+
+	// Note: vx requires kernel support
+	// and so must be fetched from HWCAP.
+
+	hwcap_VX = 1 << 11 // vector facility
+)
+
+// facilityList contains the result of an STFLE call.
+// Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type facilityList struct {
+	bits [4]uint64
+}
+
+// Has reports whether the given facilities are present.
+func (s *facilityList) Has(fs ...facility) bool {
+	if len(fs) == 0 {
+		panic("no facility bits provided")
+	}
+	for _, f := range fs {
+		if !bitIsSet(s.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+// The following feature detection functions are defined in cpu_s390x.s.
+// They are likely to be expensive to call so the results should be cached.
+func stfle() facilityList
+func kmQuery() queryResult
+func kmcQuery() queryResult
+func kmctrQuery() queryResult
+func kmaQuery() queryResult
+func kimdQuery() queryResult
+func klmdQuery() queryResult
+func kdsaQuery() queryResult
+
+func doinit() {
+	options = []option{
+		{Name: "zarch", Feature: &S390X.HasZARCH},
+		{Name: "stfle", Feature: &S390X.HasSTFLE},
+		{Name: "ldisp", Feature: &S390X.HasLDISP},
+		{Name: "msa", Feature: &S390X.HasMSA},
+		{Name: "eimm", Feature: &S390X.HasEIMM},
+		{Name: "dfp", Feature: &S390X.HasDFP},
+		{Name: "etf3eh", Feature: &S390X.HasETF3EH},
+		{Name: "vx", Feature: &S390X.HasVX},
+		{Name: "vxe", Feature: &S390X.HasVXE},
+		{Name: "kdsa", Feature: &S390X.HasKDSA},
+	}
+
+	aes := []function{aes128, aes192, aes256}
+	facilities := stfle()
+
+	S390X.HasZARCH = facilities.Has(zarch)
+	S390X.HasSTFLE = facilities.Has(stflef)
+	S390X.HasLDISP = facilities.Has(ldisp)
+	S390X.HasEIMM = facilities.Has(eimm)
+	S390X.HasDFP = facilities.Has(dfp)
+	S390X.HasETF3EH = facilities.Has(etf3eh)
+	S390X.HasMSA = facilities.Has(msa)
+
+	if S390X.HasMSA {
+		// cipher message
+		km, kmc := kmQuery(), kmcQuery()
+		S390X.HasAES = km.Has(aes...)
+		S390X.HasAESCBC = kmc.Has(aes...)
+		if facilities.Has(msa4) {
+			kmctr := kmctrQuery()
+			S390X.HasAESCTR = kmctr.Has(aes...)
+		}
+		if facilities.Has(msa8) {
+			kma := kmaQuery()
+			S390X.HasAESGCM = kma.Has(aes...)
+		}
+
+		// compute message digest
+		kimd := kimdQuery() // intermediate (no padding)
+		klmd := klmdQuery() // last (padding)
+		S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
+		S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
+		S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
+		S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
+		sha3 := []function{
+			sha3_224, sha3_256, sha3_384, sha3_512,
+			shake128, shake256,
+		}
+		S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...)
+		S390X.HasKDSA = facilities.Has(msa9) // elliptic curves
+		if S390X.HasKDSA {
+			kdsa := kdsaQuery()
+			S390X.HasECDSA = kdsa.Has(ecdsaVerifyP256, ecdsaSignP256, ecdsaVerifyP384, ecdsaSignP384, ecdsaVerifyP521, ecdsaSignP521)
+			S390X.HasEDDSA = kdsa.Has(eddsaVerifyEd25519, eddsaSignEd25519, eddsaVerifyEd448, eddsaSignEd448)
+		}
+	}
+
+	S390X.HasVX = isSet(HWCap, hwcap_VX)
+
+	if S390X.HasVX {
+		S390X.HasVXE = facilities.Has(vxe)
+	}
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/src/internal/cpu/cpu_s390x.s b/src/internal/cpu/cpu_s390x.s
new file mode 100644
index 0000000..a1243aa
--- /dev/null
+++ b/src/internal/cpu/cpu_s390x.s
@@ -0,0 +1,63 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func stfle() facilityList
+TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD $ret+0(FP), R1
+	MOVD $3, R0          // last doubleword index to store
+	XC   $32, (R1), (R1) // clear 4 doublewords (32 bytes)
+	WORD $0xb2b01000     // store facility list extended (STFLE)
+	RET
+
+// func kmQuery() queryResult
+TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KM-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92E0024    // cipher message (KM)
+	RET
+
+// func kmcQuery() queryResult
+TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMC-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92F0024    // cipher message with chaining (KMC)
+	RET
+
+// func kmctrQuery() queryResult
+TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMCTR-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92D4024    // cipher message with counter (KMCTR)
+	RET
+
+// func kmaQuery() queryResult
+TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMA-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xb9296024    // cipher message with authentication (KMA)
+	RET
+
+// func kimdQuery() queryResult
+TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KIMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93E0024    // compute intermediate message digest (KIMD)
+	RET
+
+// func klmdQuery() queryResult
+TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KLMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93F0024    // compute last message digest (KLMD)
+	RET
+
+// func kdsaQuery() queryResult
+TEXT ·kdsaQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KLMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93A0008    // compute digital signature authentication
+	RET
+
diff --git a/src/internal/cpu/cpu_s390x_test.go b/src/internal/cpu/cpu_s390x_test.go
new file mode 100644
index 0000000..ad86858
--- /dev/null
+++ b/src/internal/cpu/cpu_s390x_test.go
@@ -0,0 +1,63 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu_test
+
+import (
+	"errors"
+	. "internal/cpu"
+	"os"
+	"regexp"
+	"testing"
+)
+
+func getFeatureList() ([]string, error) {
+	cpuinfo, err := os.ReadFile("/proc/cpuinfo")
+	if err != nil {
+		return nil, err
+	}
+	r := regexp.MustCompile("features\\s*:\\s*(.*)")
+	b := r.FindSubmatch(cpuinfo)
+	if len(b) < 2 {
+		return nil, errors.New("no feature list in /proc/cpuinfo")
+	}
+	return regexp.MustCompile("\\s+").Split(string(b[1]), -1), nil
+}
+
+func TestS390XAgainstCPUInfo(t *testing.T) {
+	// mapping of linux feature strings to S390X fields
+	mapping := make(map[string]*bool)
+	for _, option := range Options {
+		mapping[option.Name] = option.Feature
+	}
+
+	// these must be true on the machines Go supports
+	mandatory := make(map[string]bool)
+	mandatory["zarch"] = false
+	mandatory["eimm"] = false
+	mandatory["ldisp"] = false
+	mandatory["stfle"] = false
+
+	features, err := getFeatureList()
+	if err != nil {
+		t.Error(err)
+	}
+	for _, feature := range features {
+		if _, ok := mandatory[feature]; ok {
+			mandatory[feature] = true
+		}
+		if flag, ok := mapping[feature]; ok {
+			if !*flag {
+				t.Errorf("feature '%v' not detected", feature)
+			}
+		} else {
+			t.Logf("no entry for '%v'", feature)
+		}
+	}
+	for k, v := range mandatory {
+		if !v {
+			t.Errorf("mandatory feature '%v' not detected", k)
+		}
+	}
+}
diff --git a/src/internal/cpu/cpu_test.go b/src/internal/cpu/cpu_test.go
new file mode 100644
index 0000000..2de7365
--- /dev/null
+++ b/src/internal/cpu/cpu_test.go
@@ -0,0 +1,83 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu_test
+
+import (
+	. "internal/cpu"
+	"internal/testenv"
+	"os"
+	"os/exec"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func TestMinimalFeatures(t *testing.T) {
+	// TODO: maybe do MustSupportFeatureDectection(t) ?
+	if runtime.GOARCH == "arm64" {
+		switch runtime.GOOS {
+		case "linux", "android", "darwin":
+		default:
+			t.Skipf("%s/%s is not supported", runtime.GOOS, runtime.GOARCH)
+		}
+	}
+
+	for _, o := range Options {
+		if o.Required && !*o.Feature {
+			t.Errorf("%v expected true, got false", o.Name)
+		}
+	}
+}
+
+func MustHaveDebugOptionsSupport(t *testing.T) {
+	if !DebugOptions {
+		t.Skipf("skipping test: cpu feature options not supported by OS")
+	}
+}
+
+func MustSupportFeatureDectection(t *testing.T) {
+	// TODO: add platforms that do not have CPU feature detection support.
+}
+
+func runDebugOptionsTest(t *testing.T, test string, options string) {
+	MustHaveDebugOptionsSupport(t)
+
+	testenv.MustHaveExec(t)
+
+	env := "GODEBUG=" + options
+
+	cmd := exec.Command(os.Args[0], "-test.run="+test)
+	cmd.Env = append(cmd.Env, env)
+
+	output, err := cmd.CombinedOutput()
+	lines := strings.Fields(string(output))
+	lastline := lines[len(lines)-1]
+
+	got := strings.TrimSpace(lastline)
+	want := "PASS"
+	if err != nil || got != want {
+		t.Fatalf("%s with %s: want %s, got %v", test, env, want, got)
+	}
+}
+
+func TestDisableAllCapabilities(t *testing.T) {
+	MustSupportFeatureDectection(t)
+	runDebugOptionsTest(t, "TestAllCapabilitiesDisabled", "cpu.all=off")
+}
+
+func TestAllCapabilitiesDisabled(t *testing.T) {
+	MustHaveDebugOptionsSupport(t)
+
+	if os.Getenv("GODEBUG") != "cpu.all=off" {
+		t.Skipf("skipping test: GODEBUG=cpu.all=off not set")
+	}
+
+	for _, o := range Options {
+		want := o.Required
+		if got := *o.Feature; got != want {
+			t.Errorf("%v: expected %v, got %v", o.Name, want, got)
+		}
+	}
+}
diff --git a/src/internal/cpu/cpu_wasm.go b/src/internal/cpu/cpu_wasm.go
new file mode 100644
index 0000000..2310ad6
--- /dev/null
+++ b/src/internal/cpu/cpu_wasm.go
@@ -0,0 +1,10 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const CacheLinePadSize = 64
+
+func doinit() {
+}
diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go
new file mode 100644
index 0000000..fd1217a
--- /dev/null
+++ b/src/internal/cpu/cpu_x86.go
@@ -0,0 +1,164 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64
+// +build 386 amd64
+
+package cpu
+
+const CacheLinePadSize = 64
+
+// cpuid is implemented in cpu_x86.s.
+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+
+// xgetbv with ecx = 0 is implemented in cpu_x86.s.
+func xgetbv() (eax, edx uint32)
+
+const (
+	// edx bits
+	cpuid_SSE2 = 1 << 26
+
+	// ecx bits
+	cpuid_SSE3      = 1 << 0
+	cpuid_PCLMULQDQ = 1 << 1
+	cpuid_SSSE3     = 1 << 9
+	cpuid_FMA       = 1 << 12
+	cpuid_SSE41     = 1 << 19
+	cpuid_SSE42     = 1 << 20
+	cpuid_POPCNT    = 1 << 23
+	cpuid_AES       = 1 << 25
+	cpuid_OSXSAVE   = 1 << 27
+	cpuid_AVX       = 1 << 28
+
+	// ebx bits
+	cpuid_BMI1 = 1 << 3
+	cpuid_AVX2 = 1 << 5
+	cpuid_BMI2 = 1 << 8
+	cpuid_ERMS = 1 << 9
+	cpuid_ADX  = 1 << 19
+)
+
+var maxExtendedFunctionInformation uint32
+
+func doinit() {
+	options = []option{
+		{Name: "adx", Feature: &X86.HasADX},
+		{Name: "aes", Feature: &X86.HasAES},
+		{Name: "avx", Feature: &X86.HasAVX},
+		{Name: "avx2", Feature: &X86.HasAVX2},
+		{Name: "bmi1", Feature: &X86.HasBMI1},
+		{Name: "bmi2", Feature: &X86.HasBMI2},
+		{Name: "erms", Feature: &X86.HasERMS},
+		{Name: "fma", Feature: &X86.HasFMA},
+		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
+		{Name: "popcnt", Feature: &X86.HasPOPCNT},
+		{Name: "sse3", Feature: &X86.HasSSE3},
+		{Name: "sse41", Feature: &X86.HasSSE41},
+		{Name: "sse42", Feature: &X86.HasSSE42},
+		{Name: "ssse3", Feature: &X86.HasSSSE3},
+
+		// These capabilities should always be enabled on amd64:
+		{Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"},
+	}
+
+	maxID, _, _, _ := cpuid(0, 0)
+
+	if maxID < 1 {
+		return
+	}
+
+	maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
+
+	_, _, ecx1, edx1 := cpuid(1, 0)
+	X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
+
+	X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
+	X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
+	X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
+	X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
+	X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
+	X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
+	X86.HasAES = isSet(ecx1, cpuid_AES)
+
+	// OSXSAVE can be false when using older Operating Systems
+	// or when explicitly disabled on newer Operating Systems by
+	// e.g. setting the xsavedisable boot option on Windows 10.
+	X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
+
+	// The FMA instruction set extension only has VEX prefixed instructions.
+	// VEX prefixed instructions require OSXSAVE to be enabled.
+	// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
+	// Section 2.4 "AVX and SSE Instruction Exception Specification"
+	X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE
+
+	osSupportsAVX := false
+	// For XGETBV, OSXSAVE bit is required and sufficient.
+	if X86.HasOSXSAVE {
+		eax, _ := xgetbv()
+		// Check if XMM and YMM registers have OS support.
+		osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
+	}
+
+	X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
+
+	if maxID < 7 {
+		return
+	}
+
+	_, ebx7, _, _ := cpuid(7, 0)
+	X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
+	X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
+	X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
+	X86.HasERMS = isSet(ebx7, cpuid_ERMS)
+	X86.HasADX = isSet(ebx7, cpuid_ADX)
+}
+
+func isSet(hwc uint32, value uint32) bool {
+	return hwc&value != 0
+}
+
+// Name returns the CPU name given by the vendor.
+// If the CPU name can not be determined an
+// empty string is returned.
+func Name() string {
+	if maxExtendedFunctionInformation < 0x80000004 {
+		return ""
+	}
+
+	data := make([]byte, 0, 3*4*4)
+
+	var eax, ebx, ecx, edx uint32
+	eax, ebx, ecx, edx = cpuid(0x80000002, 0)
+	data = appendBytes(data, eax, ebx, ecx, edx)
+	eax, ebx, ecx, edx = cpuid(0x80000003, 0)
+	data = appendBytes(data, eax, ebx, ecx, edx)
+	eax, ebx, ecx, edx = cpuid(0x80000004, 0)
+	data = appendBytes(data, eax, ebx, ecx, edx)
+
+	// Trim leading spaces.
+	for len(data) > 0 && data[0] == ' ' {
+		data = data[1:]
+	}
+
+	// Trim tail after and including the first null byte.
+	for i, c := range data {
+		if c == '\x00' {
+			data = data[:i]
+			break
+		}
+	}
+
+	return string(data)
+}
+
+func appendBytes(b []byte, args ...uint32) []byte {
+	for _, arg := range args {
+		b = append(b,
+			byte((arg >> 0)),
+			byte((arg >> 8)),
+			byte((arg >> 16)),
+			byte((arg >> 24)))
+	}
+	return b
+}
diff --git a/src/internal/cpu/cpu_x86.s b/src/internal/cpu/cpu_x86.s
new file mode 100644
index 0000000..0df5da1
--- /dev/null
+++ b/src/internal/cpu/cpu_x86.s
@@ -0,0 +1,27 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64
+// +build 386 amd64
+
+#include "textflag.h"
+
+// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·cpuid(SB), NOSPLIT, $0-24
+	MOVL eaxArg+0(FP), AX
+	MOVL ecxArg+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv() (eax, edx uint32)
+TEXT ·xgetbv(SB),NOSPLIT,$0-8
+	MOVL $0, CX
+	XGETBV
+	MOVL AX, eax+0(FP)
+	MOVL DX, edx+4(FP)
+	RET
diff --git a/src/internal/cpu/cpu_x86_test.go b/src/internal/cpu/cpu_x86_test.go
new file mode 100644
index 0000000..e3e16cc
--- /dev/null
+++ b/src/internal/cpu/cpu_x86_test.go
@@ -0,0 +1,55 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64
+// +build 386 amd64
+
+package cpu_test
+
+import (
+	. "internal/cpu"
+	"os"
+	"runtime"
+	"testing"
+)
+
+func TestX86ifAVX2hasAVX(t *testing.T) {
+	if X86.HasAVX2 && !X86.HasAVX {
+		t.Fatalf("HasAVX expected true when HasAVX2 is true, got false")
+	}
+}
+
+func TestDisableSSE2(t *testing.T) {
+	runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off")
+}
+
+func TestSSE2DebugOption(t *testing.T) {
+	MustHaveDebugOptionsSupport(t)
+
+	if os.Getenv("GODEBUG") != "cpu.sse2=off" {
+		t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set")
+	}
+
+	want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386.
+	if got := X86.HasSSE2; got != want {
+		t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got)
+	}
+}
+
+func TestDisableSSE3(t *testing.T) {
+	runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
+}
+
+func TestSSE3DebugOption(t *testing.T) {
+	MustHaveDebugOptionsSupport(t)
+
+	if os.Getenv("GODEBUG") != "cpu.sse3=off" {
+		t.Skipf("skipping test: GODEBUG=cpu.sse3=off not set")
+	}
+
+	want := false
+	if got := X86.HasSSE3; got != want {
+		t.Errorf("X86.HasSSE3 expected %v, got %v", want, got)
+	}
+}
diff --git a/src/internal/cpu/export_test.go b/src/internal/cpu/export_test.go
new file mode 100644
index 0000000..91bfc1b
--- /dev/null
+++ b/src/internal/cpu/export_test.go
@@ -0,0 +1,9 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+var (
+	Options = options
+)
diff --git a/src/internal/execabs/execabs.go b/src/internal/execabs/execabs.go
new file mode 100644
index 0000000..9a05d97
--- /dev/null
+++ b/src/internal/execabs/execabs.go
@@ -0,0 +1,70 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package execabs is a drop-in replacement for os/exec
+// that requires PATH lookups to find absolute paths.
+// That is, execabs.Command("cmd") runs the same PATH lookup
+// as exec.Command("cmd"), but if the result is a path
+// which is relative, the Run and Start methods will report
+// an error instead of running the executable.
+package execabs
+
+import (
+	"context"
+	"fmt"
+	"os/exec"
+	"path/filepath"
+	"reflect"
+	"unsafe"
+)
+
+var ErrNotFound = exec.ErrNotFound
+
+type (
+	Cmd       = exec.Cmd
+	Error     = exec.Error
+	ExitError = exec.ExitError
+)
+
+func relError(file, path string) error {
+	return fmt.Errorf("%s resolves to executable relative to current directory (.%c%s)", file, filepath.Separator, path)
+}
+
+func LookPath(file string) (string, error) {
+	path, err := exec.LookPath(file)
+	if err != nil {
+		return "", err
+	}
+	if filepath.Base(file) == file && !filepath.IsAbs(path) {
+		return "", relError(file, path)
+	}
+	return path, nil
+}
+
+func fixCmd(name string, cmd *exec.Cmd) {
+	if filepath.Base(name) == name && !filepath.IsAbs(cmd.Path) {
+		// exec.Command was called with a bare binary name and
+		// exec.LookPath returned a path which is not absolute.
+		// Set cmd.lookPathErr and clear cmd.Path so that it
+		// cannot be run.
+		lookPathErr := (*error)(unsafe.Pointer(reflect.ValueOf(cmd).Elem().FieldByName("lookPathErr").Addr().Pointer()))
+		if *lookPathErr == nil {
+			*lookPathErr = relError(name, cmd.Path)
+		}
+		cmd.Path = ""
+	}
+}
+
+func CommandContext(ctx context.Context, name string, arg ...string) *exec.Cmd {
+	cmd := exec.CommandContext(ctx, name, arg...)
+	fixCmd(name, cmd)
+	return cmd
+
+}
+
+func Command(name string, arg ...string) *exec.Cmd {
+	cmd := exec.Command(name, arg...)
+	fixCmd(name, cmd)
+	return cmd
+}
diff --git a/src/internal/execabs/execabs_test.go b/src/internal/execabs/execabs_test.go
new file mode 100644
index 0000000..97a3f39
--- /dev/null
+++ b/src/internal/execabs/execabs_test.go
@@ -0,0 +1,103 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package execabs
+
+import (
+	"context"
+	"fmt"
+	"internal/testenv"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"testing"
+)
+
+func TestFixCmd(t *testing.T) {
+	cmd := &exec.Cmd{Path: "hello"}
+	fixCmd("hello", cmd)
+	if cmd.Path != "" {
+		t.Error("fixCmd didn't clear cmd.Path")
+	}
+	expectedErr := fmt.Sprintf("hello resolves to executable relative to current directory (.%chello)", filepath.Separator)
+	if err := cmd.Run(); err == nil {
+		t.Fatal("Command.Run didn't fail")
+	} else if err.Error() != expectedErr {
+		t.Fatalf("Command.Run returned unexpected error: want %q, got %q", expectedErr, err.Error())
+	}
+}
+
+func TestCommand(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	for _, cmd := range []func(string) *Cmd{
+		func(s string) *Cmd { return Command(s) },
+		func(s string) *Cmd { return CommandContext(context.Background(), s) },
+	} {
+		tmpDir := t.TempDir()
+		executable := "execabs-test"
+		if runtime.GOOS == "windows" {
+			executable += ".exe"
+		}
+		if err := os.WriteFile(filepath.Join(tmpDir, executable), []byte{1, 2, 3}, 0111); err != nil {
+			t.Fatalf("os.WriteFile failed: %s", err)
+		}
+		cwd, err := os.Getwd()
+		if err != nil {
+			t.Fatalf("os.Getwd failed: %s", err)
+		}
+		defer os.Chdir(cwd)
+		if err = os.Chdir(tmpDir); err != nil {
+			t.Fatalf("os.Chdir failed: %s", err)
+		}
+		if runtime.GOOS != "windows" {
+			// add "." to PATH so that exec.LookPath looks in the current directory on
+			// non-windows platforms as well
+			origPath := os.Getenv("PATH")
+			defer os.Setenv("PATH", origPath)
+			os.Setenv("PATH", fmt.Sprintf(".:%s", origPath))
+		}
+		expectedErr := fmt.Sprintf("execabs-test resolves to executable relative to current directory (.%c%s)", filepath.Separator, executable)
+		if err = cmd("execabs-test").Run(); err == nil {
+			t.Fatalf("Command.Run didn't fail when exec.LookPath returned a relative path")
+		} else if err.Error() != expectedErr {
+			t.Errorf("Command.Run returned unexpected error: want %q, got %q", expectedErr, err.Error())
+		}
+	}
+}
+
+func TestLookPath(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	tmpDir := t.TempDir()
+	executable := "execabs-test"
+	if runtime.GOOS == "windows" {
+		executable += ".exe"
+	}
+	if err := os.WriteFile(filepath.Join(tmpDir, executable), []byte{1, 2, 3}, 0111); err != nil {
+		t.Fatalf("os.WriteFile failed: %s", err)
+	}
+	cwd, err := os.Getwd()
+	if err != nil {
+		t.Fatalf("os.Getwd failed: %s", err)
+	}
+	defer os.Chdir(cwd)
+	if err = os.Chdir(tmpDir); err != nil {
+		t.Fatalf("os.Chdir failed: %s", err)
+	}
+	if runtime.GOOS != "windows" {
+		// add "." to PATH so that exec.LookPath looks in the current directory on
+		// non-windows platforms as well
+		origPath := os.Getenv("PATH")
+		defer os.Setenv("PATH", origPath)
+		os.Setenv("PATH", fmt.Sprintf(".:%s", origPath))
+	}
+	expectedErr := fmt.Sprintf("execabs-test resolves to executable relative to current directory (.%c%s)", filepath.Separator, executable)
+	if _, err := LookPath("execabs-test"); err == nil {
+		t.Fatalf("LookPath didn't fail when finding a non-relative path")
+	} else if err.Error() != expectedErr {
+		t.Errorf("LookPath returned unexpected error: want %q, got %q", expectedErr, err.Error())
+	}
+}
diff --git a/src/internal/fmtsort/export_test.go b/src/internal/fmtsort/export_test.go
new file mode 100644
index 0000000..25cbb5d
--- /dev/null
+++ b/src/internal/fmtsort/export_test.go
@@ -0,0 +1,11 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fmtsort
+
+import "reflect"
+
+func Compare(a, b reflect.Value) int {
+	return compare(a, b)
+}
diff --git a/src/internal/fmtsort/sort.go b/src/internal/fmtsort/sort.go
new file mode 100644
index 0000000..7127ba6
--- /dev/null
+++ b/src/internal/fmtsort/sort.go
@@ -0,0 +1,220 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fmtsort provides a general stable ordering mechanism
+// for maps, on behalf of the fmt and text/template packages.
+// It is not guaranteed to be efficient and works only for types
+// that are valid map keys.
+package fmtsort
+
+import (
+	"reflect"
+	"sort"
+)
+
+// Note: Throughout this package we avoid calling reflect.Value.Interface as
+// it is not always legal to do so and it's easier to avoid the issue than to face it.
+
+// SortedMap represents a map's keys and values. The keys and values are
+// aligned in index order: Value[i] is the value in the map corresponding to Key[i].
+type SortedMap struct {
+	Key   []reflect.Value
+	Value []reflect.Value
+}
+
+func (o *SortedMap) Len() int           { return len(o.Key) }
+func (o *SortedMap) Less(i, j int) bool { return compare(o.Key[i], o.Key[j]) < 0 }
+func (o *SortedMap) Swap(i, j int) {
+	o.Key[i], o.Key[j] = o.Key[j], o.Key[i]
+	o.Value[i], o.Value[j] = o.Value[j], o.Value[i]
+}
+
+// Sort accepts a map and returns a SortedMap that has the same keys and
+// values but in a stable sorted order according to the keys, modulo issues
+// raised by unorderable key values such as NaNs.
+//
+// The ordering rules are more general than with Go's < operator:
+//
+//  - when applicable, nil compares low
+//  - ints, floats, and strings order by <
+//  - NaN compares less than non-NaN floats
+//  - bool compares false before true
+//  - complex compares real, then imag
+//  - pointers compare by machine address
+//  - channel values compare by machine address
+//  - structs compare each field in turn
+//  - arrays compare each element in turn.
+//    Otherwise identical arrays compare by length.
+//  - interface values compare first by reflect.Type describing the concrete type
+//    and then by concrete value as described in the previous rules.
+//
+func Sort(mapValue reflect.Value) *SortedMap {
+	if mapValue.Type().Kind() != reflect.Map {
+		return nil
+	}
+	// Note: this code is arranged to not panic even in the presence
+	// of a concurrent map update. The runtime is responsible for
+	// yelling loudly if that happens. See issue 33275.
+	n := mapValue.Len()
+	key := make([]reflect.Value, 0, n)
+	value := make([]reflect.Value, 0, n)
+	iter := mapValue.MapRange()
+	for iter.Next() {
+		key = append(key, iter.Key())
+		value = append(value, iter.Value())
+	}
+	sorted := &SortedMap{
+		Key:   key,
+		Value: value,
+	}
+	sort.Stable(sorted)
+	return sorted
+}
+
+// compare compares two values of the same type. It returns -1, 0, 1
+// according to whether a > b (1), a == b (0), or a < b (-1).
+// If the types differ, it returns -1.
+// See the comment on Sort for the comparison rules.
+func compare(aVal, bVal reflect.Value) int {
+	aType, bType := aVal.Type(), bVal.Type()
+	if aType != bType {
+		return -1 // No good answer possible, but don't return 0: they're not equal.
+	}
+	switch aVal.Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		a, b := aVal.Int(), bVal.Int()
+		switch {
+		case a < b:
+			return -1
+		case a > b:
+			return 1
+		default:
+			return 0
+		}
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		a, b := aVal.Uint(), bVal.Uint()
+		switch {
+		case a < b:
+			return -1
+		case a > b:
+			return 1
+		default:
+			return 0
+		}
+	case reflect.String:
+		a, b := aVal.String(), bVal.String()
+		switch {
+		case a < b:
+			return -1
+		case a > b:
+			return 1
+		default:
+			return 0
+		}
+	case reflect.Float32, reflect.Float64:
+		return floatCompare(aVal.Float(), bVal.Float())
+	case reflect.Complex64, reflect.Complex128:
+		a, b := aVal.Complex(), bVal.Complex()
+		if c := floatCompare(real(a), real(b)); c != 0 {
+			return c
+		}
+		return floatCompare(imag(a), imag(b))
+	case reflect.Bool:
+		a, b := aVal.Bool(), bVal.Bool()
+		switch {
+		case a == b:
+			return 0
+		case a:
+			return 1
+		default:
+			return -1
+		}
+	case reflect.Ptr, reflect.UnsafePointer:
+		a, b := aVal.Pointer(), bVal.Pointer()
+		switch {
+		case a < b:
+			return -1
+		case a > b:
+			return 1
+		default:
+			return 0
+		}
+	case reflect.Chan:
+		if c, ok := nilCompare(aVal, bVal); ok {
+			return c
+		}
+		ap, bp := aVal.Pointer(), bVal.Pointer()
+		switch {
+		case ap < bp:
+			return -1
+		case ap > bp:
+			return 1
+		default:
+			return 0
+		}
+	case reflect.Struct:
+		for i := 0; i < aVal.NumField(); i++ {
+			if c := compare(aVal.Field(i), bVal.Field(i)); c != 0 {
+				return c
+			}
+		}
+		return 0
+	case reflect.Array:
+		for i := 0; i < aVal.Len(); i++ {
+			if c := compare(aVal.Index(i), bVal.Index(i)); c != 0 {
+				return c
+			}
+		}
+		return 0
+	case reflect.Interface:
+		if c, ok := nilCompare(aVal, bVal); ok {
+			return c
+		}
+		c := compare(reflect.ValueOf(aVal.Elem().Type()), reflect.ValueOf(bVal.Elem().Type()))
+		if c != 0 {
+			return c
+		}
+		return compare(aVal.Elem(), bVal.Elem())
+	default:
+		// Certain types cannot appear as keys (maps, funcs, slices), but be explicit.
+		panic("bad type in compare: " + aType.String())
+	}
+}
+
+// nilCompare checks whether either value is nil. If not, the boolean is false.
+// If either value is nil, the boolean is true and the integer is the comparison
+// value. The comparison is defined to be 0 if both are nil, otherwise the one
+// nil value compares low. Both arguments must represent a chan, func,
+// interface, map, pointer, or slice.
+func nilCompare(aVal, bVal reflect.Value) (int, bool) {
+	if aVal.IsNil() {
+		if bVal.IsNil() {
+			return 0, true
+		}
+		return -1, true
+	}
+	if bVal.IsNil() {
+		return 1, true
+	}
+	return 0, false
+}
+
+// floatCompare compares two floating-point values. NaNs compare low.
+func floatCompare(a, b float64) int {
+	switch {
+	case isNaN(a):
+		return -1 // No good answer if b is a NaN so don't bother checking.
+	case isNaN(b):
+		return 1
+	case a < b:
+		return -1
+	case a > b:
+		return 1
+	}
+	return 0
+}
+
+func isNaN(a float64) bool {
+	return a != a
+}
diff --git a/src/internal/fmtsort/sort_test.go b/src/internal/fmtsort/sort_test.go
new file mode 100644
index 0000000..5c4db1c
--- /dev/null
+++ b/src/internal/fmtsort/sort_test.go
@@ -0,0 +1,268 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fmtsort_test
+
+import (
+	"fmt"
+	"internal/fmtsort"
+	"math"
+	"reflect"
+	"strings"
+	"testing"
+	"unsafe"
+)
+
+var compareTests = [][]reflect.Value{
+	ct(reflect.TypeOf(int(0)), -1, 0, 1),
+	ct(reflect.TypeOf(int8(0)), -1, 0, 1),
+	ct(reflect.TypeOf(int16(0)), -1, 0, 1),
+	ct(reflect.TypeOf(int32(0)), -1, 0, 1),
+	ct(reflect.TypeOf(int64(0)), -1, 0, 1),
+	ct(reflect.TypeOf(uint(0)), 0, 1, 5),
+	ct(reflect.TypeOf(uint8(0)), 0, 1, 5),
+	ct(reflect.TypeOf(uint16(0)), 0, 1, 5),
+	ct(reflect.TypeOf(uint32(0)), 0, 1, 5),
+	ct(reflect.TypeOf(uint64(0)), 0, 1, 5),
+	ct(reflect.TypeOf(uintptr(0)), 0, 1, 5),
+	ct(reflect.TypeOf(string("")), "", "a", "ab"),
+	ct(reflect.TypeOf(float32(0)), math.NaN(), math.Inf(-1), -1e10, 0, 1e10, math.Inf(1)),
+	ct(reflect.TypeOf(float64(0)), math.NaN(), math.Inf(-1), -1e10, 0, 1e10, math.Inf(1)),
+	ct(reflect.TypeOf(complex64(0+1i)), -1-1i, -1+0i, -1+1i, 0-1i, 0+0i, 0+1i, 1-1i, 1+0i, 1+1i),
+	ct(reflect.TypeOf(complex128(0+1i)), -1-1i, -1+0i, -1+1i, 0-1i, 0+0i, 0+1i, 1-1i, 1+0i, 1+1i),
+	ct(reflect.TypeOf(false), false, true),
+	ct(reflect.TypeOf(&ints[0]), &ints[0], &ints[1], &ints[2]),
+	ct(reflect.TypeOf(unsafe.Pointer(&ints[0])), unsafe.Pointer(&ints[0]), unsafe.Pointer(&ints[1]), unsafe.Pointer(&ints[2])),
+	ct(reflect.TypeOf(chans[0]), chans[0], chans[1], chans[2]),
+	ct(reflect.TypeOf(toy{}), toy{0, 1}, toy{0, 2}, toy{1, -1}, toy{1, 1}),
+	ct(reflect.TypeOf([2]int{}), [2]int{1, 1}, [2]int{1, 2}, [2]int{2, 0}),
+	ct(reflect.TypeOf(interface{}(interface{}(0))), iFace, 1, 2, 3),
+}
+
+var iFace interface{}
+
+func ct(typ reflect.Type, args ...interface{}) []reflect.Value {
+	value := make([]reflect.Value, len(args))
+	for i, v := range args {
+		x := reflect.ValueOf(v)
+		if !x.IsValid() { // Make it a typed nil.
+			x = reflect.Zero(typ)
+		} else {
+			x = x.Convert(typ)
+		}
+		value[i] = x
+	}
+	return value
+}
+
+func TestCompare(t *testing.T) {
+	for _, test := range compareTests {
+		for i, v0 := range test {
+			for j, v1 := range test {
+				c := fmtsort.Compare(v0, v1)
+				var expect int
+				switch {
+				case i == j:
+					expect = 0
+					// NaNs are tricky.
+					if typ := v0.Type(); (typ.Kind() == reflect.Float32 || typ.Kind() == reflect.Float64) && math.IsNaN(v0.Float()) {
+						expect = -1
+					}
+				case i < j:
+					expect = -1
+				case i > j:
+					expect = 1
+				}
+				if c != expect {
+					t.Errorf("%s: compare(%v,%v)=%d; expect %d", v0.Type(), v0, v1, c, expect)
+				}
+			}
+		}
+	}
+}
+
+type sortTest struct {
+	data  interface{} // Always a map.
+	print string      // Printed result using our custom printer.
+}
+
+var sortTests = []sortTest{
+	{
+		map[int]string{7: "bar", -3: "foo"},
+		"-3:foo 7:bar",
+	},
+	{
+		map[uint8]string{7: "bar", 3: "foo"},
+		"3:foo 7:bar",
+	},
+	{
+		map[string]string{"7": "bar", "3": "foo"},
+		"3:foo 7:bar",
+	},
+	{
+		map[float64]string{7: "bar", -3: "foo", math.NaN(): "nan", math.Inf(0): "inf"},
+		"NaN:nan -3:foo 7:bar +Inf:inf",
+	},
+	{
+		map[complex128]string{7 + 2i: "bar2", 7 + 1i: "bar", -3: "foo", complex(math.NaN(), 0i): "nan", complex(math.Inf(0), 0i): "inf"},
+		"(NaN+0i):nan (-3+0i):foo (7+1i):bar (7+2i):bar2 (+Inf+0i):inf",
+	},
+	{
+		map[bool]string{true: "true", false: "false"},
+		"false:false true:true",
+	},
+	{
+		chanMap(),
+		"CHAN0:0 CHAN1:1 CHAN2:2",
+	},
+	{
+		pointerMap(),
+		"PTR0:0 PTR1:1 PTR2:2",
+	},
+	{
+		unsafePointerMap(),
+		"UNSAFEPTR0:0 UNSAFEPTR1:1 UNSAFEPTR2:2",
+	},
+	{
+		map[toy]string{{7, 2}: "72", {7, 1}: "71", {3, 4}: "34"},
+		"{3 4}:34 {7 1}:71 {7 2}:72",
+	},
+	{
+		map[[2]int]string{{7, 2}: "72", {7, 1}: "71", {3, 4}: "34"},
+		"[3 4]:34 [7 1]:71 [7 2]:72",
+	},
+}
+
+func sprint(data interface{}) string {
+	om := fmtsort.Sort(reflect.ValueOf(data))
+	if om == nil {
+		return "nil"
+	}
+	b := new(strings.Builder)
+	for i, key := range om.Key {
+		if i > 0 {
+			b.WriteRune(' ')
+		}
+		b.WriteString(sprintKey(key))
+		b.WriteRune(':')
+		b.WriteString(fmt.Sprint(om.Value[i]))
+	}
+	return b.String()
+}
+
+// sprintKey formats a reflect.Value but gives reproducible values for some
+// problematic types such as pointers. Note that it only does special handling
+// for the troublesome types used in the test cases; it is not a general
+// printer.
+func sprintKey(key reflect.Value) string {
+	switch str := key.Type().String(); str {
+	case "*int":
+		ptr := key.Interface().(*int)
+		for i := range ints {
+			if ptr == &ints[i] {
+				return fmt.Sprintf("PTR%d", i)
+			}
+		}
+		return "PTR???"
+	case "unsafe.Pointer":
+		ptr := key.Interface().(unsafe.Pointer)
+		for i := range ints {
+			if ptr == unsafe.Pointer(&ints[i]) {
+				return fmt.Sprintf("UNSAFEPTR%d", i)
+			}
+		}
+		return "UNSAFEPTR???"
+	case "chan int":
+		c := key.Interface().(chan int)
+		for i := range chans {
+			if c == chans[i] {
+				return fmt.Sprintf("CHAN%d", i)
+			}
+		}
+		return "CHAN???"
+	default:
+		return fmt.Sprint(key)
+	}
+}
+
+var (
+	ints  [3]int
+	chans = [3]chan int{make(chan int), make(chan int), make(chan int)}
+)
+
+func pointerMap() map[*int]string {
+	m := make(map[*int]string)
+	for i := 2; i >= 0; i-- {
+		m[&ints[i]] = fmt.Sprint(i)
+	}
+	return m
+}
+
+func unsafePointerMap() map[unsafe.Pointer]string {
+	m := make(map[unsafe.Pointer]string)
+	for i := 2; i >= 0; i-- {
+		m[unsafe.Pointer(&ints[i])] = fmt.Sprint(i)
+	}
+	return m
+}
+
+func chanMap() map[chan int]string {
+	m := make(map[chan int]string)
+	for i := 2; i >= 0; i-- {
+		m[chans[i]] = fmt.Sprint(i)
+	}
+	return m
+}
+
+type toy struct {
+	A int // Exported.
+	b int // Unexported.
+}
+
+func TestOrder(t *testing.T) {
+	for _, test := range sortTests {
+		got := sprint(test.data)
+		if got != test.print {
+			t.Errorf("%s: got %q, want %q", reflect.TypeOf(test.data), got, test.print)
+		}
+	}
+}
+
+func TestInterface(t *testing.T) {
+	// A map containing multiple concrete types should be sorted by type,
+	// then value. However, the relative ordering of types is unspecified,
+	// so test this by checking the presence of sorted subgroups.
+	m := map[interface{}]string{
+		[2]int{1, 0}:             "",
+		[2]int{0, 1}:             "",
+		true:                     "",
+		false:                    "",
+		3.1:                      "",
+		2.1:                      "",
+		1.1:                      "",
+		math.NaN():               "",
+		3:                        "",
+		2:                        "",
+		1:                        "",
+		"c":                      "",
+		"b":                      "",
+		"a":                      "",
+		struct{ x, y int }{1, 0}: "",
+		struct{ x, y int }{0, 1}: "",
+	}
+	got := sprint(m)
+	typeGroups := []string{
+		"NaN: 1.1: 2.1: 3.1:", // float64
+		"false: true:",        // bool
+		"1: 2: 3:",            // int
+		"a: b: c:",            // string
+		"[0 1]: [1 0]:",       // [2]int
+		"{0 1}: {1 0}:",       // struct{ x int; y int }
+	}
+	for _, g := range typeGroups {
+		if !strings.Contains(got, g) {
+			t.Errorf("sorted map should contain %q", g)
+		}
+	}
+}
diff --git a/src/internal/goexperiment/exp_fieldtrack_off.go b/src/internal/goexperiment/exp_fieldtrack_off.go
new file mode 100644
index 0000000..e5e1326
--- /dev/null
+++ b/src/internal/goexperiment/exp_fieldtrack_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.fieldtrack
+// +build !goexperiment.fieldtrack
+
+package goexperiment
+
+const FieldTrack = false
+const FieldTrackInt = 0
diff --git a/src/internal/goexperiment/exp_fieldtrack_on.go b/src/internal/goexperiment/exp_fieldtrack_on.go
new file mode 100644
index 0000000..0d8c447
--- /dev/null
+++ b/src/internal/goexperiment/exp_fieldtrack_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.fieldtrack
+// +build goexperiment.fieldtrack
+
+package goexperiment
+
+const FieldTrack = true
+const FieldTrackInt = 1
diff --git a/src/internal/goexperiment/exp_preemptibleloops_off.go b/src/internal/goexperiment/exp_preemptibleloops_off.go
new file mode 100644
index 0000000..7a26088
--- /dev/null
+++ b/src/internal/goexperiment/exp_preemptibleloops_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.preemptibleloops
+// +build !goexperiment.preemptibleloops
+
+package goexperiment
+
+const PreemptibleLoops = false
+const PreemptibleLoopsInt = 0
diff --git a/src/internal/goexperiment/exp_preemptibleloops_on.go b/src/internal/goexperiment/exp_preemptibleloops_on.go
new file mode 100644
index 0000000..a9ca28c
--- /dev/null
+++ b/src/internal/goexperiment/exp_preemptibleloops_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.preemptibleloops
+// +build goexperiment.preemptibleloops
+
+package goexperiment
+
+const PreemptibleLoops = true
+const PreemptibleLoopsInt = 1
diff --git a/src/internal/goexperiment/exp_regabi_off.go b/src/internal/goexperiment/exp_regabi_off.go
new file mode 100644
index 0000000..5d88238
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabi_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabi
+// +build !goexperiment.regabi
+
+package goexperiment
+
+const Regabi = false
+const RegabiInt = 0
diff --git a/src/internal/goexperiment/exp_regabi_on.go b/src/internal/goexperiment/exp_regabi_on.go
new file mode 100644
index 0000000..c08d58e
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabi_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabi
+// +build goexperiment.regabi
+
+package goexperiment
+
+const Regabi = true
+const RegabiInt = 1
diff --git a/src/internal/goexperiment/exp_regabiargs_off.go b/src/internal/goexperiment/exp_regabiargs_off.go
new file mode 100644
index 0000000..31a139b
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabiargs_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabiargs
+// +build !goexperiment.regabiargs
+
+package goexperiment
+
+const RegabiArgs = false
+const RegabiArgsInt = 0
diff --git a/src/internal/goexperiment/exp_regabiargs_on.go b/src/internal/goexperiment/exp_regabiargs_on.go
new file mode 100644
index 0000000..9b26f3c
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabiargs_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabiargs
+// +build goexperiment.regabiargs
+
+package goexperiment
+
+const RegabiArgs = true
+const RegabiArgsInt = 1
diff --git a/src/internal/goexperiment/exp_regabidefer_off.go b/src/internal/goexperiment/exp_regabidefer_off.go
new file mode 100644
index 0000000..b47c0c2
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabidefer_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabidefer
+// +build !goexperiment.regabidefer
+
+package goexperiment
+
+const RegabiDefer = false
+const RegabiDeferInt = 0
diff --git a/src/internal/goexperiment/exp_regabidefer_on.go b/src/internal/goexperiment/exp_regabidefer_on.go
new file mode 100644
index 0000000..bbf2f6c
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabidefer_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabidefer
+// +build goexperiment.regabidefer
+
+package goexperiment
+
+const RegabiDefer = true
+const RegabiDeferInt = 1
diff --git a/src/internal/goexperiment/exp_regabig_off.go b/src/internal/goexperiment/exp_regabig_off.go
new file mode 100644
index 0000000..1b37d45
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabig_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabig
+// +build !goexperiment.regabig
+
+package goexperiment
+
+const RegabiG = false
+const RegabiGInt = 0
diff --git a/src/internal/goexperiment/exp_regabig_on.go b/src/internal/goexperiment/exp_regabig_on.go
new file mode 100644
index 0000000..7e5b162
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabig_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabig
+// +build goexperiment.regabig
+
+package goexperiment
+
+const RegabiG = true
+const RegabiGInt = 1
diff --git a/src/internal/goexperiment/exp_regabireflect_off.go b/src/internal/goexperiment/exp_regabireflect_off.go
new file mode 100644
index 0000000..515f4a5
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabireflect_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabireflect
+// +build !goexperiment.regabireflect
+
+package goexperiment
+
+const RegabiReflect = false
+const RegabiReflectInt = 0
diff --git a/src/internal/goexperiment/exp_regabireflect_on.go b/src/internal/goexperiment/exp_regabireflect_on.go
new file mode 100644
index 0000000..e8a3e9c
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabireflect_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabireflect
+// +build goexperiment.regabireflect
+
+package goexperiment
+
+const RegabiReflect = true
+const RegabiReflectInt = 1
diff --git a/src/internal/goexperiment/exp_regabiwrappers_off.go b/src/internal/goexperiment/exp_regabiwrappers_off.go
new file mode 100644
index 0000000..bfa0fa3
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabiwrappers_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.regabiwrappers
+// +build !goexperiment.regabiwrappers
+
+package goexperiment
+
+const RegabiWrappers = false
+const RegabiWrappersInt = 0
diff --git a/src/internal/goexperiment/exp_regabiwrappers_on.go b/src/internal/goexperiment/exp_regabiwrappers_on.go
new file mode 100644
index 0000000..11ffffb
--- /dev/null
+++ b/src/internal/goexperiment/exp_regabiwrappers_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.regabiwrappers
+// +build goexperiment.regabiwrappers
+
+package goexperiment
+
+const RegabiWrappers = true
+const RegabiWrappersInt = 1
diff --git a/src/internal/goexperiment/exp_staticlockranking_off.go b/src/internal/goexperiment/exp_staticlockranking_off.go
new file mode 100644
index 0000000..3d546c0
--- /dev/null
+++ b/src/internal/goexperiment/exp_staticlockranking_off.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build !goexperiment.staticlockranking
+// +build !goexperiment.staticlockranking
+
+package goexperiment
+
+const StaticLockRanking = false
+const StaticLockRankingInt = 0
diff --git a/src/internal/goexperiment/exp_staticlockranking_on.go b/src/internal/goexperiment/exp_staticlockranking_on.go
new file mode 100644
index 0000000..78188fb
--- /dev/null
+++ b/src/internal/goexperiment/exp_staticlockranking_on.go
@@ -0,0 +1,9 @@
+// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build goexperiment.staticlockranking
+// +build goexperiment.staticlockranking
+
+package goexperiment
+
+const StaticLockRanking = true
+const StaticLockRankingInt = 1
diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go
new file mode 100644
index 0000000..cd4c178
--- /dev/null
+++ b/src/internal/goexperiment/flags.go
@@ -0,0 +1,93 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package goexperiment implements support for toolchain experiments.
+//
+// Toolchain experiments are controlled by the GOEXPERIMENT
+// environment variable. GOEXPERIMENT is a comma-separated list of
+// experiment names. GOEXPERIMENT can be set at make.bash time, which
+// sets the default experiments for binaries built with the tool
+// chain; or it can be set at build time. GOEXPERIMENT can also be set
+// to "none", which disables any experiments that were enabled at
+// make.bash time.
+//
+// Experiments are exposed to the build in the following ways:
+//
+// - Build tag goexperiment.x is set if experiment x (lower case) is
+// enabled.
+//
+// - For each experiment x (in camel case), this package contains a
+// boolean constant x and an integer constant xInt.
+//
+// - In runtime assembly, the macro GOEXPERIMENT_x is defined if
+// experiment x (lower case) is enabled.
+//
+// In the toolchain, the set of experiments enabled for the current
+// build should be accessed via objabi.Experiment.
+//
+// The set of experiments is included in the output of runtime.Version()
+// and "go version <binary>" if it differs from the default experiments.
+//
+// For the set of experiments supported by the current toolchain, see
+// "go doc goexperiment.Flags".
+//
+// Note that this package defines the set of experiments (in Flags)
+// and records the experiments that were enabled when the package
+// was compiled (as boolean and integer constants).
+//
+// Note especially that this package does not itself change behavior
+// at run time based on the GOEXPERIMENT variable.
+// The code used in builds to interpret the GOEXPERIMENT variable
+// is in the separate package internal/buildcfg.
+package goexperiment
+
+//go:generate go run mkconsts.go
+
+// Flags is the set of experiments that can be enabled or disabled in
+// the current toolchain.
+//
+// When specified in the GOEXPERIMENT environment variable or as build
+// tags, experiments use the strings.ToLower of their field name.
+//
+// For the baseline experimental configuration, see
+// objabi.experimentBaseline.
+//
+// If you change this struct definition, run "go generate".
+type Flags struct {
+	FieldTrack        bool
+	PreemptibleLoops  bool
+	StaticLockRanking bool
+
+	// Regabi is split into several sub-experiments that can be
+	// enabled individually. Not all combinations work.
+	// The "regabi" GOEXPERIMENT is an alias for all "working"
+	// subexperiments.
+
+	// RegabiWrappers enables ABI wrappers for calling between
+	// ABI0 and ABIInternal functions. Without this, the ABIs are
+	// assumed to be identical so cross-ABI calls are direct.
+	RegabiWrappers bool
+	// RegabiG enables dedicated G and zero registers in
+	// ABIInternal.
+	//
+	// Requires wrappers because it makes the ABIs incompatible.
+	RegabiG bool
+	// RegabiReflect enables the register-passing paths in
+	// reflection calls. This is also gated by intArgRegs in
+	// reflect and runtime (which are disabled by default) so it
+	// can be used in targeted tests.
+	RegabiReflect bool
+	// RegabiDefer enables desugaring defer and go calls
+	// into argument-less closures.
+	RegabiDefer bool
+	// RegabiArgs enables register arguments/results in all
+	// compiled Go functions.
+	//
+	// Requires wrappers (to do ABI translation), g (because
+	// runtime assembly that's been ported to ABIInternal uses the
+	// G register), reflect (so reflection calls use registers),
+	// and defer (because the runtime doesn't support passing
+	// register arguments to defer/go).
+	RegabiArgs bool
+}
diff --git a/src/internal/goexperiment/mkconsts.go b/src/internal/goexperiment/mkconsts.go
new file mode 100644
index 0000000..204ca9d
--- /dev/null
+++ b/src/internal/goexperiment/mkconsts.go
@@ -0,0 +1,74 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+// +build ignore
+
+// mkconsts generates const definition files for each GOEXPERIMENT.
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"internal/goexperiment"
+	"log"
+	"os"
+	"reflect"
+	"strings"
+)
+
+func main() {
+	// Delete existing experiment constant files.
+	ents, err := os.ReadDir(".")
+	if err != nil {
+		log.Fatal(err)
+	}
+	for _, ent := range ents {
+		name := ent.Name()
+		if !strings.HasPrefix(name, "exp_") {
+			continue
+		}
+		// Check that this is definitely a generated file.
+		data, err := os.ReadFile(name)
+		if err != nil {
+			log.Fatalf("reading %s: %v", name, err)
+		}
+		if !bytes.Contains(data, []byte("Code generated by mkconsts")) {
+			log.Fatalf("%s: expected generated file", name)
+		}
+		if err := os.Remove(name); err != nil {
+			log.Fatal(err)
+		}
+	}
+
+	// Generate new experiment constant files.
+	rt := reflect.TypeOf(&goexperiment.Flags{}).Elem()
+	for i := 0; i < rt.NumField(); i++ {
+		f := rt.Field(i).Name
+		buildTag := "goexperiment." + strings.ToLower(f)
+		for _, val := range []bool{false, true} {
+			name := fmt.Sprintf("exp_%s_%s.go", strings.ToLower(f), pick(val, "off", "on"))
+			data := fmt.Sprintf(`// Code generated by mkconsts.go. DO NOT EDIT.
+
+//go:build %s%s
+// +build %s%s
+
+package goexperiment
+
+const %s = %v
+const %sInt = %s
+`, pick(val, "!", ""), buildTag, pick(val, "!", ""), buildTag, f, val, f, pick(val, "0", "1"))
+			if err := os.WriteFile(name, []byte(data), 0666); err != nil {
+				log.Fatalf("writing %s: %v", name, err)
+			}
+		}
+	}
+}
+
+func pick(v bool, f, t string) string {
+	if v {
+		return t
+	}
+	return f
+}
diff --git a/src/internal/goroot/gc.go b/src/internal/goroot/gc.go
new file mode 100644
index 0000000..2338b78
--- /dev/null
+++ b/src/internal/goroot/gc.go
@@ -0,0 +1,132 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+package goroot
+
+import (
+	exec "internal/execabs"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+)
+
+// IsStandardPackage reports whether path is a standard package,
+// given goroot and compiler.
+func IsStandardPackage(goroot, compiler, path string) bool {
+	switch compiler {
+	case "gc":
+		dir := filepath.Join(goroot, "src", path)
+		_, err := os.Stat(dir)
+		return err == nil
+	case "gccgo":
+		return gccgoSearch.isStandard(path)
+	default:
+		panic("unknown compiler " + compiler)
+	}
+}
+
+// gccgoSearch holds the gccgo search directories.
+type gccgoDirs struct {
+	once sync.Once
+	dirs []string
+}
+
+// gccgoSearch is used to check whether a gccgo package exists in the
+// standard library.
+var gccgoSearch gccgoDirs
+
+// init finds the gccgo search directories. If this fails it leaves dirs == nil.
+func (gd *gccgoDirs) init() {
+	gccgo := os.Getenv("GCCGO")
+	if gccgo == "" {
+		gccgo = "gccgo"
+	}
+	bin, err := exec.LookPath(gccgo)
+	if err != nil {
+		return
+	}
+
+	allDirs, err := exec.Command(bin, "-print-search-dirs").Output()
+	if err != nil {
+		return
+	}
+	versionB, err := exec.Command(bin, "-dumpversion").Output()
+	if err != nil {
+		return
+	}
+	version := strings.TrimSpace(string(versionB))
+	machineB, err := exec.Command(bin, "-dumpmachine").Output()
+	if err != nil {
+		return
+	}
+	machine := strings.TrimSpace(string(machineB))
+
+	dirsEntries := strings.Split(string(allDirs), "\n")
+	const prefix = "libraries: ="
+	var dirs []string
+	for _, dirEntry := range dirsEntries {
+		if strings.HasPrefix(dirEntry, prefix) {
+			dirs = filepath.SplitList(strings.TrimPrefix(dirEntry, prefix))
+			break
+		}
+	}
+	if len(dirs) == 0 {
+		return
+	}
+
+	var lastDirs []string
+	for _, dir := range dirs {
+		goDir := filepath.Join(dir, "go", version)
+		if fi, err := os.Stat(goDir); err == nil && fi.IsDir() {
+			gd.dirs = append(gd.dirs, goDir)
+			goDir = filepath.Join(goDir, machine)
+			if fi, err = os.Stat(goDir); err == nil && fi.IsDir() {
+				gd.dirs = append(gd.dirs, goDir)
+			}
+		}
+		if fi, err := os.Stat(dir); err == nil && fi.IsDir() {
+			lastDirs = append(lastDirs, dir)
+		}
+	}
+	gd.dirs = append(gd.dirs, lastDirs...)
+}
+
+// isStandard reports whether path is a standard library for gccgo.
+func (gd *gccgoDirs) isStandard(path string) bool {
+	// Quick check: if the first path component has a '.', it's not
+	// in the standard library. This skips most GOPATH directories.
+	i := strings.Index(path, "/")
+	if i < 0 {
+		i = len(path)
+	}
+	if strings.Contains(path[:i], ".") {
+		return false
+	}
+
+	if path == "unsafe" {
+		// Special case.
+		return true
+	}
+
+	gd.once.Do(gd.init)
+	if gd.dirs == nil {
+		// We couldn't find the gccgo search directories.
+		// Best guess, since the first component did not contain
+		// '.', is that this is a standard library package.
+		return true
+	}
+
+	for _, dir := range gd.dirs {
+		full := filepath.Join(dir, path) + ".gox"
+		if fi, err := os.Stat(full); err == nil && !fi.IsDir() {
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/src/internal/goroot/gccgo.go b/src/internal/goroot/gccgo.go
new file mode 100644
index 0000000..b1041da
--- /dev/null
+++ b/src/internal/goroot/gccgo.go
@@ -0,0 +1,28 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gccgo
+// +build gccgo
+
+package goroot
+
+import (
+	"os"
+	"path/filepath"
+)
+
+// IsStandardPackage reports whether path is a standard package,
+// given goroot and compiler.
+func IsStandardPackage(goroot, compiler, path string) bool {
+	switch compiler {
+	case "gc":
+		dir := filepath.Join(goroot, "src", path)
+		_, err := os.Stat(dir)
+		return err == nil
+	case "gccgo":
+		return stdpkg[path]
+	default:
+		panic("unknown compiler " + compiler)
+	}
+}
diff --git a/src/internal/goversion/goversion.go b/src/internal/goversion/goversion.go
new file mode 100644
index 0000000..4cc1568
--- /dev/null
+++ b/src/internal/goversion/goversion.go
@@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package goversion
+
+// Version is the Go 1.x version which is currently
+// in development and will eventually get released.
+//
+// It should be updated at the start of each development cycle to be
+// the version of the next Go 1.x release. See golang.org/issue/40705.
+const Version = 17
diff --git a/src/internal/itoa/itoa.go b/src/internal/itoa/itoa.go
new file mode 100644
index 0000000..c6062d9
--- /dev/null
+++ b/src/internal/itoa/itoa.go
@@ -0,0 +1,33 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Simple conversions to avoid depending on strconv.
+
+package itoa
+
+// Itoa converts val to a decimal string.
+func Itoa(val int) string {
+	if val < 0 {
+		return "-" + Uitoa(uint(-val))
+	}
+	return Uitoa(uint(val))
+}
+
+// Uitoa converts val to a decimal string.
+func Uitoa(val uint) string {
+	if val == 0 { // avoid string allocation
+		return "0"
+	}
+	var buf [20]byte // big enough for 64bit value base 10
+	i := len(buf) - 1
+	for val >= 10 {
+		q := val / 10
+		buf[i] = byte('0' + val - q*10)
+		i--
+		val = q
+	}
+	// val < 10
+	buf[i] = byte('0' + val)
+	return string(buf[i:])
+}
diff --git a/src/internal/itoa/itoa_test.go b/src/internal/itoa/itoa_test.go
new file mode 100644
index 0000000..71931c1
--- /dev/null
+++ b/src/internal/itoa/itoa_test.go
@@ -0,0 +1,40 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package itoa_test
+
+import (
+	"fmt"
+	"internal/itoa"
+	"math"
+	"testing"
+)
+
+var (
+	minInt64  int64  = math.MinInt64
+	maxInt64  int64  = math.MaxInt64
+	maxUint64 uint64 = math.MaxUint64
+)
+
+func TestItoa(t *testing.T) {
+	tests := []int{int(minInt64), math.MinInt32, -999, -100, -1, 0, 1, 100, 999, math.MaxInt32, int(maxInt64)}
+	for _, tt := range tests {
+		got := itoa.Itoa(tt)
+		want := fmt.Sprint(tt)
+		if want != got {
+			t.Fatalf("Itoa(%d) = %s, want %s", tt, got, want)
+		}
+	}
+}
+
+func TestUitoa(t *testing.T) {
+	tests := []uint{0, 1, 100, 999, math.MaxUint32, uint(maxUint64)}
+	for _, tt := range tests {
+		got := itoa.Uitoa(tt)
+		want := fmt.Sprint(tt)
+		if want != got {
+			t.Fatalf("Uitoa(%d) = %s, want %s", tt, got, want)
+		}
+	}
+}
diff --git a/src/internal/lazyregexp/lazyre.go b/src/internal/lazyregexp/lazyre.go
new file mode 100644
index 0000000..2681af3
--- /dev/null
+++ b/src/internal/lazyregexp/lazyre.go
@@ -0,0 +1,78 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lazyregexp is a thin wrapper over regexp, allowing the use of global
+// regexp variables without forcing them to be compiled at init.
+package lazyregexp
+
+import (
+	"os"
+	"regexp"
+	"strings"
+	"sync"
+)
+
+// Regexp is a wrapper around regexp.Regexp, where the underlying regexp will be
+// compiled the first time it is needed.
+type Regexp struct {
+	str  string
+	once sync.Once
+	rx   *regexp.Regexp
+}
+
+func (r *Regexp) re() *regexp.Regexp {
+	r.once.Do(r.build)
+	return r.rx
+}
+
+func (r *Regexp) build() {
+	r.rx = regexp.MustCompile(r.str)
+	r.str = ""
+}
+
+func (r *Regexp) FindSubmatch(s []byte) [][]byte {
+	return r.re().FindSubmatch(s)
+}
+
+func (r *Regexp) FindStringSubmatch(s string) []string {
+	return r.re().FindStringSubmatch(s)
+}
+
+func (r *Regexp) FindStringSubmatchIndex(s string) []int {
+	return r.re().FindStringSubmatchIndex(s)
+}
+
+func (r *Regexp) ReplaceAllString(src, repl string) string {
+	return r.re().ReplaceAllString(src, repl)
+}
+
+func (r *Regexp) FindString(s string) string {
+	return r.re().FindString(s)
+}
+
+func (r *Regexp) FindAllString(s string, n int) []string {
+	return r.re().FindAllString(s, n)
+}
+
+func (r *Regexp) MatchString(s string) bool {
+	return r.re().MatchString(s)
+}
+
+func (r *Regexp) SubexpNames() []string {
+	return r.re().SubexpNames()
+}
+
+var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
+
+// New creates a new lazy regexp, delaying the compiling work until it is first
+// needed. If the code is being run as part of tests, the regexp compiling will
+// happen immediately.
+func New(str string) *Regexp {
+	lr := &Regexp{str: str}
+	if inTest {
+		// In tests, always compile the regexps early.
+		lr.re()
+	}
+	return lr
+}
diff --git a/src/internal/lazytemplate/lazytemplate.go b/src/internal/lazytemplate/lazytemplate.go
new file mode 100644
index 0000000..c83eaea
--- /dev/null
+++ b/src/internal/lazytemplate/lazytemplate.go
@@ -0,0 +1,52 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lazytemplate is a thin wrapper over text/template, allowing the use
+// of global template variables without forcing them to be parsed at init.
+package lazytemplate
+
+import (
+	"io"
+	"os"
+	"strings"
+	"sync"
+	"text/template"
+)
+
+// Template is a wrapper around text/template.Template, where the underlying
+// template will be parsed the first time it is needed.
+type Template struct {
+	name, text string
+
+	once sync.Once
+	tmpl *template.Template
+}
+
+func (r *Template) tp() *template.Template {
+	r.once.Do(r.build)
+	return r.tmpl
+}
+
+func (r *Template) build() {
+	r.tmpl = template.Must(template.New(r.name).Parse(r.text))
+	r.name, r.text = "", ""
+}
+
+func (r *Template) Execute(w io.Writer, data interface{}) error {
+	return r.tp().Execute(w, data)
+}
+
+var inTest = len(os.Args) > 0 && strings.HasSuffix(strings.TrimSuffix(os.Args[0], ".exe"), ".test")
+
+// New creates a new lazy template, delaying the parsing work until it is first
+// needed. If the code is being run as part of tests, the template parsing will
+// happen immediately.
+func New(name, text string) *Template {
+	lt := &Template{name: name, text: text}
+	if inTest {
+		// In tests, always parse the templates early.
+		lt.tp()
+	}
+	return lt
+}
diff --git a/src/internal/nettrace/nettrace.go b/src/internal/nettrace/nettrace.go
new file mode 100644
index 0000000..de3254d
--- /dev/null
+++ b/src/internal/nettrace/nettrace.go
@@ -0,0 +1,45 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package nettrace contains internal hooks for tracing activity in
+// the net package. This package is purely internal for use by the
+// net/http/httptrace package and has no stable API exposed to end
+// users.
+package nettrace
+
+// TraceKey is a context.Context Value key. Its associated value should
+// be a *Trace struct.
+type TraceKey struct{}
+
+// LookupIPAltResolverKey is a context.Context Value key used by tests to
+// specify an alternate resolver func.
+// It is not exposed to outsider users. (But see issue 12503)
+// The value should be the same type as lookupIP:
+//     func lookupIP(ctx context.Context, host string) ([]IPAddr, error)
+type LookupIPAltResolverKey struct{}
+
+// Trace contains a set of hooks for tracing events within
+// the net package. Any specific hook may be nil.
+type Trace struct {
+	// DNSStart is called with the hostname of a DNS lookup
+	// before it begins.
+	DNSStart func(name string)
+
+	// DNSDone is called after a DNS lookup completes (or fails).
+	// The coalesced parameter is whether singleflight de-dupped
+	// the call. The addrs are of type net.IPAddr but can't
+	// actually be for circular dependency reasons.
+	DNSDone func(netIPs []interface{}, coalesced bool, err error)
+
+	// ConnectStart is called before a Dial, excluding Dials made
+	// during DNS lookups. In the case of DualStack (Happy Eyeballs)
+	// dialing, this may be called multiple times, from multiple
+	// goroutines.
+	ConnectStart func(network, addr string)
+
+	// ConnectStart is called after a Dial with the results, excluding
+	// Dials made during DNS lookups. It may also be called multiple
+	// times, like ConnectStart.
+	ConnectDone func(network, addr string, err error)
+}
diff --git a/src/internal/obscuretestdata/obscuretestdata.go b/src/internal/obscuretestdata/obscuretestdata.go
new file mode 100644
index 0000000..5ea2cdf
--- /dev/null
+++ b/src/internal/obscuretestdata/obscuretestdata.go
@@ -0,0 +1,50 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package obscuretestdata contains functionality used by tests to more easily
+// work with testdata that must be obscured primarily due to
+// golang.org/issue/34986.
+package obscuretestdata
+
+import (
+	"encoding/base64"
+	"io"
+	"os"
+)
+
+// DecodeToTempFile decodes the named file to a temporary location.
+// If successful, it returns the path of the decoded file.
+// The caller is responsible for ensuring that the temporary file is removed.
+func DecodeToTempFile(name string) (path string, err error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	tmp, err := os.CreateTemp("", "obscuretestdata-decoded-")
+	if err != nil {
+		return "", err
+	}
+	if _, err := io.Copy(tmp, base64.NewDecoder(base64.StdEncoding, f)); err != nil {
+		tmp.Close()
+		os.Remove(tmp.Name())
+		return "", err
+	}
+	if err := tmp.Close(); err != nil {
+		os.Remove(tmp.Name())
+		return "", err
+	}
+	return tmp.Name(), nil
+}
+
+// ReadFile reads the named file and returns its decoded contents.
+func ReadFile(name string) ([]byte, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	return io.ReadAll(base64.NewDecoder(base64.StdEncoding, f))
+}
diff --git a/src/internal/oserror/errors.go b/src/internal/oserror/errors.go
new file mode 100644
index 0000000..28a1ab3
--- /dev/null
+++ b/src/internal/oserror/errors.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package oserror defines errors values used in the os package.
+//
+// These types are defined here to permit the syscall package to reference them.
+package oserror
+
+import "errors"
+
+var (
+	ErrInvalid    = errors.New("invalid argument")
+	ErrPermission = errors.New("permission denied")
+	ErrExist      = errors.New("file already exists")
+	ErrNotExist   = errors.New("file does not exist")
+	ErrClosed     = errors.New("file already closed")
+)
diff --git a/src/internal/poll/copy_file_range_linux.go b/src/internal/poll/copy_file_range_linux.go
new file mode 100644
index 0000000..5b9e5d4
--- /dev/null
+++ b/src/internal/poll/copy_file_range_linux.go
@@ -0,0 +1,164 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"sync/atomic"
+	"syscall"
+)
+
+var copyFileRangeSupported int32 = -1 // accessed atomically
+
+const maxCopyFileRangeRound = 1 << 30
+
+func kernelVersion() (major int, minor int) {
+	var uname syscall.Utsname
+	if err := syscall.Uname(&uname); err != nil {
+		return
+	}
+
+	rl := uname.Release
+	var values [2]int
+	vi := 0
+	value := 0
+	for _, c := range rl {
+		if '0' <= c && c <= '9' {
+			value = (value * 10) + int(c-'0')
+		} else {
+			// Note that we're assuming N.N.N here.  If we see anything else we are likely to
+			// mis-parse it.
+			values[vi] = value
+			vi++
+			if vi >= len(values) {
+				break
+			}
+			value = 0
+		}
+	}
+	switch vi {
+	case 0:
+		return 0, 0
+	case 1:
+		return values[0], 0
+	case 2:
+		return values[0], values[1]
+	}
+	return
+}
+
+// CopyFileRange copies at most remain bytes of data from src to dst, using
+// the copy_file_range system call. dst and src must refer to regular files.
+func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
+	if supported := atomic.LoadInt32(&copyFileRangeSupported); supported == 0 {
+		return 0, false, nil
+	} else if supported == -1 {
+		major, minor := kernelVersion()
+		if major > 5 || (major == 5 && minor >= 3) {
+			atomic.StoreInt32(&copyFileRangeSupported, 1)
+		} else {
+			// copy_file_range(2) is broken in various ways on kernels older than 5.3,
+			// see issue #42400 and
+			// https://man7.org/linux/man-pages/man2/copy_file_range.2.html#VERSIONS
+			atomic.StoreInt32(&copyFileRangeSupported, 0)
+			return 0, false, nil
+		}
+	}
+	for remain > 0 {
+		max := remain
+		if max > maxCopyFileRangeRound {
+			max = maxCopyFileRangeRound
+		}
+		n, err := copyFileRange(dst, src, int(max))
+		switch err {
+		case syscall.ENOSYS:
+			// copy_file_range(2) was introduced in Linux 4.5.
+			// Go supports Linux >= 2.6.33, so the system call
+			// may not be present.
+			//
+			// If we see ENOSYS, we have certainly not transferred
+			// any data, so we can tell the caller that we
+			// couldn't handle the transfer and let them fall
+			// back to more generic code.
+			//
+			// Seeing ENOSYS also means that we will not try to
+			// use copy_file_range(2) again.
+			atomic.StoreInt32(&copyFileRangeSupported, 0)
+			return 0, false, nil
+		case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM:
+			// Prior to Linux 5.3, it was not possible to
+			// copy_file_range across file systems. Similarly to
+			// the ENOSYS case above, if we see EXDEV, we have
+			// not transferred any data, and we can let the caller
+			// fall back to generic code.
+			//
+			// As for EINVAL, that is what we see if, for example,
+			// dst or src refer to a pipe rather than a regular
+			// file. This is another case where no data has been
+			// transferred, so we consider it unhandled.
+			//
+			// If src and dst are on CIFS, we can see EIO.
+			// See issue #42334.
+			//
+			// If the file is on NFS, we can see EOPNOTSUPP.
+			// See issue #40731.
+			//
+			// If the process is running inside a Docker container,
+			// we might see EPERM instead of ENOSYS. See issue
+			// #40893. Since EPERM might also be a legitimate error,
+			// don't mark copy_file_range(2) as unsupported.
+			return 0, false, nil
+		case nil:
+			if n == 0 {
+				// If we did not read any bytes at all,
+				// then this file may be in a file system
+				// where copy_file_range silently fails.
+				// https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0
+				if written == 0 {
+					return 0, false, nil
+				}
+				// Otherwise src is at EOF, which means
+				// we are done.
+				return written, true, nil
+			}
+			remain -= n
+			written += n
+		default:
+			return written, true, err
+		}
+	}
+	return written, true, nil
+}
+
+// copyFileRange performs one round of copy_file_range(2).
+func copyFileRange(dst, src *FD, max int) (written int64, err error) {
+	// The signature of copy_file_range(2) is:
+	//
+	// ssize_t copy_file_range(int fd_in, loff_t *off_in,
+	//                         int fd_out, loff_t *off_out,
+	//                         size_t len, unsigned int flags);
+	//
+	// Note that in the call to unix.CopyFileRange below, we use nil
+	// values for off_in and off_out. For the system call, this means
+	// "use and update the file offsets". That is why we must acquire
+	// locks for both file descriptors (and why this whole machinery is
+	// in the internal/poll package to begin with).
+	if err := dst.writeLock(); err != nil {
+		return 0, err
+	}
+	defer dst.writeUnlock()
+	if err := src.readLock(); err != nil {
+		return 0, err
+	}
+	defer src.readUnlock()
+	var n int
+	for {
+		n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	return int64(n), err
+}
diff --git a/src/internal/poll/errno_unix.go b/src/internal/poll/errno_unix.go
new file mode 100644
index 0000000..55c5488
--- /dev/null
+++ b/src/internal/poll/errno_unix.go
@@ -0,0 +1,34 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package poll
+
+import "syscall"
+
+// Do the interface allocations only once for common
+// Errno values.
+var (
+	errEAGAIN error = syscall.EAGAIN
+	errEINVAL error = syscall.EINVAL
+	errENOENT error = syscall.ENOENT
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case syscall.EAGAIN:
+		return errEAGAIN
+	case syscall.EINVAL:
+		return errEINVAL
+	case syscall.ENOENT:
+		return errENOENT
+	}
+	return e
+}
diff --git a/src/internal/poll/errno_windows.go b/src/internal/poll/errno_windows.go
new file mode 100644
index 0000000..c55f5f0
--- /dev/null
+++ b/src/internal/poll/errno_windows.go
@@ -0,0 +1,32 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build windows
+// +build windows
+
+package poll
+
+import "syscall"
+
+// Do the interface allocations only once for common
+// Errno values.
+
+var (
+	errERROR_IO_PENDING error = syscall.Errno(syscall.ERROR_IO_PENDING)
+)
+
+// ErrnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return nil
+	case syscall.ERROR_IO_PENDING:
+		return errERROR_IO_PENDING
+	}
+	// TODO: add more here, after collecting data on the common
+	// error values see on Windows. (perhaps when running
+	// all.bat?)
+	return e
+}
diff --git a/src/internal/poll/error_linux_test.go b/src/internal/poll/error_linux_test.go
new file mode 100644
index 0000000..059fb8e
--- /dev/null
+++ b/src/internal/poll/error_linux_test.go
@@ -0,0 +1,31 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"errors"
+	"internal/poll"
+	"os"
+	"syscall"
+)
+
+func badStateFile() (*os.File, error) {
+	if os.Getuid() != 0 {
+		return nil, errors.New("must be root")
+	}
+	// Using OpenFile for a device file is an easy way to make a
+	// file attached to the runtime-integrated network poller and
+	// configured in halfway.
+	return os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
+}
+
+func isBadStateFileError(err error) (string, bool) {
+	switch err {
+	case poll.ErrNotPollable, syscall.EBADFD:
+		return "", true
+	default:
+		return "not pollable or file in bad state error", false
+	}
+}
diff --git a/src/internal/poll/error_stub_test.go b/src/internal/poll/error_stub_test.go
new file mode 100644
index 0000000..bcc25dd
--- /dev/null
+++ b/src/internal/poll/error_stub_test.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux
+// +build !linux
+
+package poll_test
+
+import (
+	"errors"
+	"os"
+	"runtime"
+)
+
+func badStateFile() (*os.File, error) {
+	return nil, errors.New("not supported on " + runtime.GOOS)
+}
+
+func isBadStateFileError(err error) (string, bool) {
+	return "", false
+}
diff --git a/src/internal/poll/error_test.go b/src/internal/poll/error_test.go
new file mode 100644
index 0000000..abc8b16
--- /dev/null
+++ b/src/internal/poll/error_test.go
@@ -0,0 +1,51 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"fmt"
+	"io/fs"
+	"net"
+	"os"
+	"testing"
+	"time"
+)
+
+func TestReadError(t *testing.T) {
+	t.Run("ErrNotPollable", func(t *testing.T) {
+		f, err := badStateFile()
+		if err != nil {
+			t.Skip(err)
+		}
+		defer f.Close()
+
+		// Give scheduler a chance to have two separated
+		// goroutines: an event poller and an event waiter.
+		time.Sleep(100 * time.Millisecond)
+
+		var b [1]byte
+		_, err = f.Read(b[:])
+		if perr := parseReadError(err, isBadStateFileError); perr != nil {
+			t.Fatal(perr)
+		}
+	})
+}
+
+func parseReadError(nestedErr error, verify func(error) (string, bool)) error {
+	err := nestedErr
+	if nerr, ok := err.(*net.OpError); ok {
+		err = nerr.Err
+	}
+	if nerr, ok := err.(*fs.PathError); ok {
+		err = nerr.Err
+	}
+	if nerr, ok := err.(*os.SyscallError); ok {
+		err = nerr.Err
+	}
+	if s, ok := verify(err); !ok {
+		return fmt.Errorf("got %v; want %s", nestedErr, s)
+	}
+	return nil
+}
diff --git a/src/internal/poll/export_linux_test.go b/src/internal/poll/export_linux_test.go
new file mode 100644
index 0000000..7fba793
--- /dev/null
+++ b/src/internal/poll/export_linux_test.go
@@ -0,0 +1,22 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing on linux.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var (
+	GetPipe     = getPipe
+	PutPipe     = putPipe
+	NewPipe     = newPipe
+	DestroyPipe = destroyPipe
+)
+
+func GetPipeFds(p *SplicePipe) (int, int) {
+	return p.rfd, p.wfd
+}
+
+type SplicePipe = splicePipe
diff --git a/src/internal/poll/export_posix_test.go b/src/internal/poll/export_posix_test.go
new file mode 100644
index 0000000..f59c1f6
--- /dev/null
+++ b/src/internal/poll/export_posix_test.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+// Export guts for testing on posix.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+func (fd *FD) EOFError(n int, err error) error {
+	return fd.eofError(n, err)
+}
diff --git a/src/internal/poll/export_test.go b/src/internal/poll/export_test.go
new file mode 100644
index 0000000..02664d9
--- /dev/null
+++ b/src/internal/poll/export_test.go
@@ -0,0 +1,35 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var Consume = consume
+
+type FDMutex struct {
+	fdMutex
+}
+
+func (mu *FDMutex) Incref() bool {
+	return mu.incref()
+}
+
+func (mu *FDMutex) IncrefAndClose() bool {
+	return mu.increfAndClose()
+}
+
+func (mu *FDMutex) Decref() bool {
+	return mu.decref()
+}
+
+func (mu *FDMutex) RWLock(read bool) bool {
+	return mu.rwlock(read)
+}
+
+func (mu *FDMutex) RWUnlock(read bool) bool {
+	return mu.rwunlock(read)
+}
diff --git a/src/internal/poll/export_windows_test.go b/src/internal/poll/export_windows_test.go
new file mode 100644
index 0000000..88ed71a
--- /dev/null
+++ b/src/internal/poll/export_windows_test.go
@@ -0,0 +1,17 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Export guts for testing on windows.
+// Since testing imports os and os imports internal/poll,
+// the internal/poll tests can not be in package poll.
+
+package poll
+
+var (
+	LogInitFD = &logInitFD
+)
+
+func (fd *FD) IsPartOfNetpoll() bool {
+	return fd.pd.runtimeCtx != 0
+}
diff --git a/src/internal/poll/fcntl_js.go b/src/internal/poll/fcntl_js.go
new file mode 100644
index 0000000..7bf0ddc
--- /dev/null
+++ b/src/internal/poll/fcntl_js.go
@@ -0,0 +1,15 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build js && wasm
+// +build js,wasm
+
+package poll
+
+import "syscall"
+
+// fcntl not supported on js/wasm
+func fcntl(fd int, cmd int, arg int) (int, error) {
+	return 0, syscall.ENOSYS
+}
diff --git a/src/internal/poll/fcntl_libc.go b/src/internal/poll/fcntl_libc.go
new file mode 100644
index 0000000..cc609e4
--- /dev/null
+++ b/src/internal/poll/fcntl_libc.go
@@ -0,0 +1,14 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || solaris
+// +build aix darwin solaris
+
+package poll
+
+import _ "unsafe" // for go:linkname
+
+// Implemented in the syscall package.
+//go:linkname fcntl syscall.fcntl
+func fcntl(fd int, cmd int, arg int) (int, error)
diff --git a/src/internal/poll/fcntl_syscall.go b/src/internal/poll/fcntl_syscall.go
new file mode 100644
index 0000000..8db5b66
--- /dev/null
+++ b/src/internal/poll/fcntl_syscall.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux || netbsd || openbsd
+// +build dragonfly freebsd linux netbsd openbsd
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"syscall"
+)
+
+func fcntl(fd int, cmd int, arg int) (int, error) {
+	r, _, e := syscall.Syscall(unix.FcntlSyscall, uintptr(fd), uintptr(cmd), uintptr(arg))
+	if e != 0 {
+		return int(r), syscall.Errno(e)
+	}
+	return int(r), nil
+}
diff --git a/src/internal/poll/fd.go b/src/internal/poll/fd.go
new file mode 100644
index 0000000..69a9005
--- /dev/null
+++ b/src/internal/poll/fd.go
@@ -0,0 +1,82 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package poll supports non-blocking I/O on file descriptors with polling.
+// This supports I/O operations that block only a goroutine, not a thread.
+// This is used by the net and os packages.
+// It uses a poller built into the runtime, with support from the
+// runtime scheduler.
+package poll
+
+import (
+	"errors"
+)
+
+// errNetClosing is the type of the variable ErrNetClosing.
+// This is used to implement the net.Error interface.
+type errNetClosing struct{}
+
+// Error returns the error message for ErrNetClosing.
+// Keep this string consistent because of issue #4373:
+// since historically programs have not been able to detect
+// this error, they look for the string.
+func (e errNetClosing) Error() string { return "use of closed network connection" }
+
+func (e errNetClosing) Timeout() bool   { return false }
+func (e errNetClosing) Temporary() bool { return false }
+
+// ErrNetClosing is returned when a network descriptor is used after
+// it has been closed.
+var ErrNetClosing = errNetClosing{}
+
+// ErrFileClosing is returned when a file descriptor is used after it
+// has been closed.
+var ErrFileClosing = errors.New("use of closed file")
+
+// ErrNoDeadline is returned when a request is made to set a deadline
+// on a file type that does not use the poller.
+var ErrNoDeadline = errors.New("file type does not support deadline")
+
+// Return the appropriate closing error based on isFile.
+func errClosing(isFile bool) error {
+	if isFile {
+		return ErrFileClosing
+	}
+	return ErrNetClosing
+}
+
+// ErrDeadlineExceeded is returned for an expired deadline.
+// This is exported by the os package as os.ErrDeadlineExceeded.
+var ErrDeadlineExceeded error = &DeadlineExceededError{}
+
+// DeadlineExceededError is returned for an expired deadline.
+type DeadlineExceededError struct{}
+
+// Implement the net.Error interface.
+// The string is "i/o timeout" because that is what was returned
+// by earlier Go versions. Changing it may break programs that
+// match on error strings.
+func (e *DeadlineExceededError) Error() string   { return "i/o timeout" }
+func (e *DeadlineExceededError) Timeout() bool   { return true }
+func (e *DeadlineExceededError) Temporary() bool { return true }
+
+// ErrNotPollable is returned when the file or socket is not suitable
+// for event notification.
+var ErrNotPollable = errors.New("not pollable")
+
+// consume removes data from a slice of byte slices, for writev.
+func consume(v *[][]byte, n int64) {
+	for len(*v) > 0 {
+		ln0 := int64(len((*v)[0]))
+		if ln0 > n {
+			(*v)[0] = (*v)[0][n:]
+			return
+		}
+		n -= ln0
+		*v = (*v)[1:]
+	}
+}
+
+// TestHookDidWritev is a hook for testing writev.
+var TestHookDidWritev = func(wrote int) {}
diff --git a/src/internal/poll/fd_fsync_darwin.go b/src/internal/poll/fd_fsync_darwin.go
new file mode 100644
index 0000000..48e7596
--- /dev/null
+++ b/src/internal/poll/fd_fsync_darwin.go
@@ -0,0 +1,21 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Fsync invokes SYS_FCNTL with SYS_FULLFSYNC because
+// on OS X, SYS_FSYNC doesn't fully flush contents to disk.
+// See Issue #26650 as well as the man page for fsync on OS X.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		_, err := fcntl(fd.Sysfd, syscall.F_FULLFSYNC, 0)
+		return err
+	})
+}
diff --git a/src/internal/poll/fd_fsync_posix.go b/src/internal/poll/fd_fsync_posix.go
new file mode 100644
index 0000000..651a5ec
--- /dev/null
+++ b/src/internal/poll/fd_fsync_posix.go
@@ -0,0 +1,21 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris
+// +build aix dragonfly freebsd js,wasm linux netbsd openbsd solaris
+
+package poll
+
+import "syscall"
+
+// Fsync wraps syscall.Fsync.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fsync(fd.Sysfd)
+	})
+}
diff --git a/src/internal/poll/fd_fsync_windows.go b/src/internal/poll/fd_fsync_windows.go
new file mode 100644
index 0000000..fb12119
--- /dev/null
+++ b/src/internal/poll/fd_fsync_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Fsync wraps syscall.Fsync.
+func (fd *FD) Fsync() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Fsync(fd.Sysfd)
+}
diff --git a/src/internal/poll/fd_io_plan9.go b/src/internal/poll/fd_io_plan9.go
new file mode 100644
index 0000000..3205ac8
--- /dev/null
+++ b/src/internal/poll/fd_io_plan9.go
@@ -0,0 +1,92 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/itoa"
+	"runtime"
+	"sync"
+	"syscall"
+)
+
+// asyncIO implements asynchronous cancelable I/O.
+// An asyncIO represents a single asynchronous Read or Write
+// operation. The result is returned on the result channel.
+// The undergoing I/O system call can either complete or be
+// interrupted by a note.
+type asyncIO struct {
+	res chan result
+
+	// mu guards the pid field.
+	mu sync.Mutex
+
+	// pid holds the process id of
+	// the process running the IO operation.
+	pid int
+}
+
+// result is the return value of a Read or Write operation.
+type result struct {
+	n   int
+	err error
+}
+
+// newAsyncIO returns a new asyncIO that performs an I/O
+// operation by calling fn, which must do one and only one
+// interruptible system call.
+func newAsyncIO(fn func([]byte) (int, error), b []byte) *asyncIO {
+	aio := &asyncIO{
+		res: make(chan result, 0),
+	}
+	aio.mu.Lock()
+	go func() {
+		// Lock the current goroutine to its process
+		// and store the pid in io so that Cancel can
+		// interrupt it. We ignore the "hangup" signal,
+		// so the signal does not take down the entire
+		// Go runtime.
+		runtime.LockOSThread()
+		runtime_ignoreHangup()
+		aio.pid = syscall.Getpid()
+		aio.mu.Unlock()
+
+		n, err := fn(b)
+
+		aio.mu.Lock()
+		aio.pid = -1
+		runtime_unignoreHangup()
+		aio.mu.Unlock()
+
+		aio.res <- result{n, err}
+	}()
+	return aio
+}
+
+// Cancel interrupts the I/O operation, causing
+// the Wait function to return.
+func (aio *asyncIO) Cancel() {
+	aio.mu.Lock()
+	defer aio.mu.Unlock()
+	if aio.pid == -1 {
+		return
+	}
+	f, e := syscall.Open("/proc/"+itoa.Itoa(aio.pid)+"/note", syscall.O_WRONLY)
+	if e != nil {
+		return
+	}
+	syscall.Write(f, []byte("hangup"))
+	syscall.Close(f)
+}
+
+// Wait for the I/O operation to complete.
+func (aio *asyncIO) Wait() (int, error) {
+	res := <-aio.res
+	return res.n, res.err
+}
+
+// The following functions, provided by the runtime, are used to
+// ignore and unignore the "hangup" signal received by the process.
+func runtime_ignoreHangup()
+func runtime_unignoreHangup()
diff --git a/src/internal/poll/fd_mutex.go b/src/internal/poll/fd_mutex.go
new file mode 100644
index 0000000..0a8ee6f
--- /dev/null
+++ b/src/internal/poll/fd_mutex.go
@@ -0,0 +1,252 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "sync/atomic"
+
+// fdMutex is a specialized synchronization primitive that manages
+// lifetime of an fd and serializes access to Read, Write and Close
+// methods on FD.
+type fdMutex struct {
+	state uint64
+	rsema uint32
+	wsema uint32
+}
+
+// fdMutex.state is organized as follows:
+// 1 bit - whether FD is closed, if set all subsequent lock operations will fail.
+// 1 bit - lock for read operations.
+// 1 bit - lock for write operations.
+// 20 bits - total number of references (read+write+misc).
+// 20 bits - number of outstanding read waiters.
+// 20 bits - number of outstanding write waiters.
+const (
+	mutexClosed  = 1 << 0
+	mutexRLock   = 1 << 1
+	mutexWLock   = 1 << 2
+	mutexRef     = 1 << 3
+	mutexRefMask = (1<<20 - 1) << 3
+	mutexRWait   = 1 << 23
+	mutexRMask   = (1<<20 - 1) << 23
+	mutexWWait   = 1 << 43
+	mutexWMask   = (1<<20 - 1) << 43
+)
+
+const overflowMsg = "too many concurrent operations on a single file or socket (max 1048575)"
+
+// Read operations must do rwlock(true)/rwunlock(true).
+//
+// Write operations must do rwlock(false)/rwunlock(false).
+//
+// Misc operations must do incref/decref.
+// Misc operations include functions like setsockopt and setDeadline.
+// They need to use incref/decref to ensure that they operate on the
+// correct fd in presence of a concurrent close call (otherwise fd can
+// be closed under their feet).
+//
+// Close operations must do increfAndClose/decref.
+
+// incref adds a reference to mu.
+// It reports whether mu is available for reading or writing.
+func (mu *fdMutex) incref() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		new := old + mutexRef
+		if new&mutexRefMask == 0 {
+			panic(overflowMsg)
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			return true
+		}
+	}
+}
+
+// increfAndClose sets the state of mu to closed.
+// It returns false if the file was already closed.
+func (mu *fdMutex) increfAndClose() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		// Mark as closed and acquire a reference.
+		new := (old | mutexClosed) + mutexRef
+		if new&mutexRefMask == 0 {
+			panic(overflowMsg)
+		}
+		// Remove all read and write waiters.
+		new &^= mutexRMask | mutexWMask
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			// Wake all read and write waiters,
+			// they will observe closed flag after wakeup.
+			for old&mutexRMask != 0 {
+				old -= mutexRWait
+				runtime_Semrelease(&mu.rsema)
+			}
+			for old&mutexWMask != 0 {
+				old -= mutexWWait
+				runtime_Semrelease(&mu.wsema)
+			}
+			return true
+		}
+	}
+}
+
+// decref removes a reference from mu.
+// It reports whether there is no remaining reference.
+func (mu *fdMutex) decref() bool {
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexRefMask == 0 {
+			panic("inconsistent poll.fdMutex")
+		}
+		new := old - mutexRef
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			return new&(mutexClosed|mutexRefMask) == mutexClosed
+		}
+	}
+}
+
+// lock adds a reference to mu and locks mu.
+// It reports whether mu is available for reading or writing.
+func (mu *fdMutex) rwlock(read bool) bool {
+	var mutexBit, mutexWait, mutexMask uint64
+	var mutexSema *uint32
+	if read {
+		mutexBit = mutexRLock
+		mutexWait = mutexRWait
+		mutexMask = mutexRMask
+		mutexSema = &mu.rsema
+	} else {
+		mutexBit = mutexWLock
+		mutexWait = mutexWWait
+		mutexMask = mutexWMask
+		mutexSema = &mu.wsema
+	}
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexClosed != 0 {
+			return false
+		}
+		var new uint64
+		if old&mutexBit == 0 {
+			// Lock is free, acquire it.
+			new = (old | mutexBit) + mutexRef
+			if new&mutexRefMask == 0 {
+				panic(overflowMsg)
+			}
+		} else {
+			// Wait for lock.
+			new = old + mutexWait
+			if new&mutexMask == 0 {
+				panic(overflowMsg)
+			}
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			if old&mutexBit == 0 {
+				return true
+			}
+			runtime_Semacquire(mutexSema)
+			// The signaller has subtracted mutexWait.
+		}
+	}
+}
+
+// unlock removes a reference from mu and unlocks mu.
+// It reports whether there is no remaining reference.
+func (mu *fdMutex) rwunlock(read bool) bool {
+	var mutexBit, mutexWait, mutexMask uint64
+	var mutexSema *uint32
+	if read {
+		mutexBit = mutexRLock
+		mutexWait = mutexRWait
+		mutexMask = mutexRMask
+		mutexSema = &mu.rsema
+	} else {
+		mutexBit = mutexWLock
+		mutexWait = mutexWWait
+		mutexMask = mutexWMask
+		mutexSema = &mu.wsema
+	}
+	for {
+		old := atomic.LoadUint64(&mu.state)
+		if old&mutexBit == 0 || old&mutexRefMask == 0 {
+			panic("inconsistent poll.fdMutex")
+		}
+		// Drop lock, drop reference and wake read waiter if present.
+		new := (old &^ mutexBit) - mutexRef
+		if old&mutexMask != 0 {
+			new -= mutexWait
+		}
+		if atomic.CompareAndSwapUint64(&mu.state, old, new) {
+			if old&mutexMask != 0 {
+				runtime_Semrelease(mutexSema)
+			}
+			return new&(mutexClosed|mutexRefMask) == mutexClosed
+		}
+	}
+}
+
+// Implemented in runtime package.
+func runtime_Semacquire(sema *uint32)
+func runtime_Semrelease(sema *uint32)
+
+// incref adds a reference to fd.
+// It returns an error when fd cannot be used.
+func (fd *FD) incref() error {
+	if !fd.fdmu.incref() {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// decref removes a reference from fd.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) decref() error {
+	if fd.fdmu.decref() {
+		return fd.destroy()
+	}
+	return nil
+}
+
+// readLock adds a reference to fd and locks fd for reading.
+// It returns an error when fd cannot be used for reading.
+func (fd *FD) readLock() error {
+	if !fd.fdmu.rwlock(true) {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// readUnlock removes a reference from fd and unlocks fd for reading.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) readUnlock() {
+	if fd.fdmu.rwunlock(true) {
+		fd.destroy()
+	}
+}
+
+// writeLock adds a reference to fd and locks fd for writing.
+// It returns an error when fd cannot be used for writing.
+func (fd *FD) writeLock() error {
+	if !fd.fdmu.rwlock(false) {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// writeUnlock removes a reference from fd and unlocks fd for writing.
+// It also closes fd when the state of fd is set to closed and there
+// is no remaining reference.
+func (fd *FD) writeUnlock() {
+	if fd.fdmu.rwunlock(false) {
+		fd.destroy()
+	}
+}
diff --git a/src/internal/poll/fd_mutex_test.go b/src/internal/poll/fd_mutex_test.go
new file mode 100644
index 0000000..3029b9a
--- /dev/null
+++ b/src/internal/poll/fd_mutex_test.go
@@ -0,0 +1,222 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	. "internal/poll"
+	"math/rand"
+	"runtime"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestMutexLock(t *testing.T) {
+	var mu FDMutex
+
+	if !mu.Incref() {
+		t.Fatal("broken")
+	}
+	if mu.Decref() {
+		t.Fatal("broken")
+	}
+
+	if !mu.RWLock(true) {
+		t.Fatal("broken")
+	}
+	if mu.RWUnlock(true) {
+		t.Fatal("broken")
+	}
+
+	if !mu.RWLock(false) {
+		t.Fatal("broken")
+	}
+	if mu.RWUnlock(false) {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexClose(t *testing.T) {
+	var mu FDMutex
+	if !mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+
+	if mu.Incref() {
+		t.Fatal("broken")
+	}
+	if mu.RWLock(true) {
+		t.Fatal("broken")
+	}
+	if mu.RWLock(false) {
+		t.Fatal("broken")
+	}
+	if mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexCloseUnblock(t *testing.T) {
+	c := make(chan bool, 4)
+	var mu FDMutex
+	mu.RWLock(true)
+	for i := 0; i < 4; i++ {
+		go func() {
+			if mu.RWLock(true) {
+				t.Error("broken")
+				return
+			}
+			c <- true
+		}()
+	}
+	// Concurrent goroutines must not be able to read lock the mutex.
+	time.Sleep(time.Millisecond)
+	select {
+	case <-c:
+		t.Fatal("broken")
+	default:
+	}
+	mu.IncrefAndClose() // Must unblock the readers.
+	for i := 0; i < 4; i++ {
+		select {
+		case <-c:
+		case <-time.After(10 * time.Second):
+			t.Fatal("broken")
+		}
+	}
+	if mu.Decref() {
+		t.Fatal("broken")
+	}
+	if !mu.RWUnlock(true) {
+		t.Fatal("broken")
+	}
+}
+
+func TestMutexPanic(t *testing.T) {
+	ensurePanics := func(f func()) {
+		defer func() {
+			if recover() == nil {
+				t.Fatal("does not panic")
+			}
+		}()
+		f()
+	}
+
+	var mu FDMutex
+	ensurePanics(func() { mu.Decref() })
+	ensurePanics(func() { mu.RWUnlock(true) })
+	ensurePanics(func() { mu.RWUnlock(false) })
+
+	ensurePanics(func() { mu.Incref(); mu.Decref(); mu.Decref() })
+	ensurePanics(func() { mu.RWLock(true); mu.RWUnlock(true); mu.RWUnlock(true) })
+	ensurePanics(func() { mu.RWLock(false); mu.RWUnlock(false); mu.RWUnlock(false) })
+
+	// ensure that it's still not broken
+	mu.Incref()
+	mu.Decref()
+	mu.RWLock(true)
+	mu.RWUnlock(true)
+	mu.RWLock(false)
+	mu.RWUnlock(false)
+}
+
+func TestMutexOverflowPanic(t *testing.T) {
+	defer func() {
+		r := recover()
+		if r == nil {
+			t.Fatal("did not panic")
+		}
+		msg, ok := r.(string)
+		if !ok {
+			t.Fatalf("unexpected panic type %T", r)
+		}
+		if !strings.Contains(msg, "too many") || strings.Contains(msg, "inconsistent") {
+			t.Fatalf("wrong panic message %q", msg)
+		}
+	}()
+
+	var mu1 FDMutex
+	for i := 0; i < 1<<21; i++ {
+		mu1.Incref()
+	}
+}
+
+func TestMutexStress(t *testing.T) {
+	P := 8
+	N := int(1e6)
+	if testing.Short() {
+		P = 4
+		N = 1e4
+	}
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
+	done := make(chan bool, P)
+	var mu FDMutex
+	var readState [2]uint64
+	var writeState [2]uint64
+	for p := 0; p < P; p++ {
+		go func() {
+			defer func() {
+				done <- !t.Failed()
+			}()
+			r := rand.New(rand.NewSource(rand.Int63()))
+			for i := 0; i < N; i++ {
+				switch r.Intn(3) {
+				case 0:
+					if !mu.Incref() {
+						t.Error("broken")
+						return
+					}
+					if mu.Decref() {
+						t.Error("broken")
+						return
+					}
+				case 1:
+					if !mu.RWLock(true) {
+						t.Error("broken")
+						return
+					}
+					// Ensure that it provides mutual exclusion for readers.
+					if readState[0] != readState[1] {
+						t.Error("broken")
+						return
+					}
+					readState[0]++
+					readState[1]++
+					if mu.RWUnlock(true) {
+						t.Error("broken")
+						return
+					}
+				case 2:
+					if !mu.RWLock(false) {
+						t.Error("broken")
+						return
+					}
+					// Ensure that it provides mutual exclusion for writers.
+					if writeState[0] != writeState[1] {
+						t.Error("broken")
+						return
+					}
+					writeState[0]++
+					writeState[1]++
+					if mu.RWUnlock(false) {
+						t.Error("broken")
+						return
+					}
+				}
+			}
+		}()
+	}
+	for p := 0; p < P; p++ {
+		if !<-done {
+			t.FailNow()
+		}
+	}
+	if !mu.IncrefAndClose() {
+		t.Fatal("broken")
+	}
+	if !mu.Decref() {
+		t.Fatal("broken")
+	}
+}
diff --git a/src/internal/poll/fd_opendir_darwin.go b/src/internal/poll/fd_opendir_darwin.go
new file mode 100644
index 0000000..8eb770c
--- /dev/null
+++ b/src/internal/poll/fd_opendir_darwin.go
@@ -0,0 +1,38 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+// OpenDir returns a pointer to a DIR structure suitable for
+// ReadDir. In case of an error, the name of the failed
+// syscall is returned along with a syscall.Errno.
+func (fd *FD) OpenDir() (uintptr, string, error) {
+	// fdopendir(3) takes control of the file descriptor,
+	// so use a dup.
+	fd2, call, err := fd.Dup()
+	if err != nil {
+		return 0, call, err
+	}
+	var dir uintptr
+	for {
+		dir, err = fdopendir(fd2)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	if err != nil {
+		syscall.Close(fd2)
+		return 0, "fdopendir", err
+	}
+	return dir, "", nil
+}
+
+// Implemented in syscall/syscall_darwin.go.
+//go:linkname fdopendir syscall.fdopendir
+func fdopendir(fd int) (dir uintptr, err error)
diff --git a/src/internal/poll/fd_plan9.go b/src/internal/poll/fd_plan9.go
new file mode 100644
index 0000000..0b5b937
--- /dev/null
+++ b/src/internal/poll/fd_plan9.go
@@ -0,0 +1,233 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"errors"
+	"io"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+type atomicBool int32
+
+func (b *atomicBool) isSet() bool { return atomic.LoadInt32((*int32)(b)) != 0 }
+func (b *atomicBool) setFalse()   { atomic.StoreInt32((*int32)(b), 0) }
+func (b *atomicBool) setTrue()    { atomic.StoreInt32((*int32)(b), 1) }
+
+type FD struct {
+	// Lock sysfd and serialize access to Read and Write methods.
+	fdmu fdMutex
+
+	Destroy func()
+
+	// deadlines
+	rmu       sync.Mutex
+	wmu       sync.Mutex
+	raio      *asyncIO
+	waio      *asyncIO
+	rtimer    *time.Timer
+	wtimer    *time.Timer
+	rtimedout atomicBool // set true when read deadline has been reached
+	wtimedout atomicBool // set true when write deadline has been reached
+
+	// Whether this is a normal file.
+	// On Plan 9 we do not use this package for ordinary files,
+	// so this is always false, but the field is present because
+	// shared code in fd_mutex.go checks it.
+	isFile bool
+}
+
+// We need this to close out a file descriptor when it is unlocked,
+// but the real implementation has to live in the net package because
+// it uses os.File's.
+func (fd *FD) destroy() error {
+	if fd.Destroy != nil {
+		fd.Destroy()
+	}
+	return nil
+}
+
+// Close handles the locking for closing an FD. The real operation
+// is in the net package.
+func (fd *FD) Close() error {
+	if !fd.fdmu.increfAndClose() {
+		return errClosing(fd.isFile)
+	}
+	return nil
+}
+
+// Read implements io.Reader.
+func (fd *FD) Read(fn func([]byte) (int, error), b []byte) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if len(b) == 0 {
+		return 0, nil
+	}
+	fd.rmu.Lock()
+	if fd.rtimedout.isSet() {
+		fd.rmu.Unlock()
+		return 0, ErrDeadlineExceeded
+	}
+	fd.raio = newAsyncIO(fn, b)
+	fd.rmu.Unlock()
+	n, err := fd.raio.Wait()
+	fd.raio = nil
+	if isHangup(err) {
+		err = io.EOF
+	}
+	if isInterrupted(err) {
+		err = ErrDeadlineExceeded
+	}
+	return n, err
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(fn func([]byte) (int, error), b []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	fd.wmu.Lock()
+	if fd.wtimedout.isSet() {
+		fd.wmu.Unlock()
+		return 0, ErrDeadlineExceeded
+	}
+	fd.waio = newAsyncIO(fn, b)
+	fd.wmu.Unlock()
+	n, err := fd.waio.Wait()
+	fd.waio = nil
+	if isInterrupted(err) {
+		err = ErrDeadlineExceeded
+	}
+	return n, err
+}
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	d := t.Sub(time.Now())
+	if mode == 'r' || mode == 'r'+'w' {
+		fd.rmu.Lock()
+		defer fd.rmu.Unlock()
+		fd.rtimedout.setFalse()
+	}
+	if mode == 'w' || mode == 'r'+'w' {
+		fd.wmu.Lock()
+		defer fd.wmu.Unlock()
+		fd.wtimedout.setFalse()
+	}
+	if t.IsZero() || d < 0 {
+		// Stop timer
+		if mode == 'r' || mode == 'r'+'w' {
+			if fd.rtimer != nil {
+				fd.rtimer.Stop()
+			}
+			fd.rtimer = nil
+		}
+		if mode == 'w' || mode == 'r'+'w' {
+			if fd.wtimer != nil {
+				fd.wtimer.Stop()
+			}
+			fd.wtimer = nil
+		}
+	} else {
+		// Interrupt I/O operation once timer has expired
+		if mode == 'r' || mode == 'r'+'w' {
+			fd.rtimer = time.AfterFunc(d, func() {
+				fd.rmu.Lock()
+				fd.rtimedout.setTrue()
+				if fd.raio != nil {
+					fd.raio.Cancel()
+				}
+				fd.rmu.Unlock()
+			})
+		}
+		if mode == 'w' || mode == 'r'+'w' {
+			fd.wtimer = time.AfterFunc(d, func() {
+				fd.wmu.Lock()
+				fd.wtimedout.setTrue()
+				if fd.waio != nil {
+					fd.waio.Cancel()
+				}
+				fd.wmu.Unlock()
+			})
+		}
+	}
+	if !t.IsZero() && d < 0 {
+		// Interrupt current I/O operation
+		if mode == 'r' || mode == 'r'+'w' {
+			fd.rtimedout.setTrue()
+			if fd.raio != nil {
+				fd.raio.Cancel()
+			}
+		}
+		if mode == 'w' || mode == 'r'+'w' {
+			fd.wtimedout.setTrue()
+			if fd.waio != nil {
+				fd.waio.Cancel()
+			}
+		}
+	}
+	return nil
+}
+
+// On Plan 9 only, expose the locking for the net code.
+
+// ReadLock wraps FD.readLock.
+func (fd *FD) ReadLock() error {
+	return fd.readLock()
+}
+
+// ReadUnlock wraps FD.readUnlock.
+func (fd *FD) ReadUnlock() {
+	fd.readUnlock()
+}
+
+func isHangup(err error) bool {
+	return err != nil && stringsHasSuffix(err.Error(), "Hangup")
+}
+
+func isInterrupted(err error) bool {
+	return err != nil && stringsHasSuffix(err.Error(), "interrupted")
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+func IsPollDescriptor(fd uintptr) bool {
+	return false
+}
+
+// RawControl invokes the user-defined function f for a non-IO
+// operation.
+func (fd *FD) RawControl(f func(uintptr)) error {
+	return errors.New("not implemented")
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+	return errors.New("not implemented")
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+	return errors.New("not implemented")
+}
diff --git a/src/internal/poll/fd_poll_js.go b/src/internal/poll/fd_poll_js.go
new file mode 100644
index 0000000..760e248
--- /dev/null
+++ b/src/internal/poll/fd_poll_js.go
@@ -0,0 +1,100 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build js && wasm
+// +build js,wasm
+
+package poll
+
+import (
+	"syscall"
+	"time"
+)
+
+type pollDesc struct {
+	fd      *FD
+	closing bool
+}
+
+func (pd *pollDesc) init(fd *FD) error { pd.fd = fd; return nil }
+
+func (pd *pollDesc) close() {}
+
+func (pd *pollDesc) evict() {
+	pd.closing = true
+	if pd.fd != nil {
+		syscall.StopIO(pd.fd.Sysfd)
+	}
+}
+
+func (pd *pollDesc) prepare(mode int, isFile bool) error {
+	if pd.closing {
+		return errClosing(isFile)
+	}
+	return nil
+}
+
+func (pd *pollDesc) prepareRead(isFile bool) error { return pd.prepare('r', isFile) }
+
+func (pd *pollDesc) prepareWrite(isFile bool) error { return pd.prepare('w', isFile) }
+
+func (pd *pollDesc) wait(mode int, isFile bool) error {
+	if pd.closing {
+		return errClosing(isFile)
+	}
+	if isFile { // TODO(neelance): wasm: Use callbacks from JS to block until the read/write finished.
+		return nil
+	}
+	return ErrDeadlineExceeded
+}
+
+func (pd *pollDesc) waitRead(isFile bool) error { return pd.wait('r', isFile) }
+
+func (pd *pollDesc) waitWrite(isFile bool) error { return pd.wait('w', isFile) }
+
+func (pd *pollDesc) waitCanceled(mode int) {}
+
+func (pd *pollDesc) pollable() bool { return true }
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	d := t.UnixNano()
+	if t.IsZero() {
+		d = 0
+	}
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	switch mode {
+	case 'r':
+		syscall.SetReadDeadline(fd.Sysfd, d)
+	case 'w':
+		syscall.SetWriteDeadline(fd.Sysfd, d)
+	case 'r' + 'w':
+		syscall.SetReadDeadline(fd.Sysfd, d)
+		syscall.SetWriteDeadline(fd.Sysfd, d)
+	}
+	fd.decref()
+	return nil
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+func IsPollDescriptor(fd uintptr) bool {
+	return false
+}
diff --git a/src/internal/poll/fd_poll_runtime.go b/src/internal/poll/fd_poll_runtime.go
new file mode 100644
index 0000000..b072af0
--- /dev/null
+++ b/src/internal/poll/fd_poll_runtime.go
@@ -0,0 +1,169 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || windows || solaris
+// +build aix darwin dragonfly freebsd linux netbsd openbsd windows solaris
+
+package poll
+
+import (
+	"errors"
+	"sync"
+	"syscall"
+	"time"
+	_ "unsafe" // for go:linkname
+)
+
+// runtimeNano returns the current value of the runtime clock in nanoseconds.
+//go:linkname runtimeNano runtime.nanotime
+func runtimeNano() int64
+
+func runtime_pollServerInit()
+func runtime_pollOpen(fd uintptr) (uintptr, int)
+func runtime_pollClose(ctx uintptr)
+func runtime_pollWait(ctx uintptr, mode int) int
+func runtime_pollWaitCanceled(ctx uintptr, mode int) int
+func runtime_pollReset(ctx uintptr, mode int) int
+func runtime_pollSetDeadline(ctx uintptr, d int64, mode int)
+func runtime_pollUnblock(ctx uintptr)
+func runtime_isPollServerDescriptor(fd uintptr) bool
+
+type pollDesc struct {
+	runtimeCtx uintptr
+}
+
+var serverInit sync.Once
+
+func (pd *pollDesc) init(fd *FD) error {
+	serverInit.Do(runtime_pollServerInit)
+	ctx, errno := runtime_pollOpen(uintptr(fd.Sysfd))
+	if errno != 0 {
+		return errnoErr(syscall.Errno(errno))
+	}
+	pd.runtimeCtx = ctx
+	return nil
+}
+
+func (pd *pollDesc) close() {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollClose(pd.runtimeCtx)
+	pd.runtimeCtx = 0
+}
+
+// Evict evicts fd from the pending list, unblocking any I/O running on fd.
+func (pd *pollDesc) evict() {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollUnblock(pd.runtimeCtx)
+}
+
+func (pd *pollDesc) prepare(mode int, isFile bool) error {
+	if pd.runtimeCtx == 0 {
+		return nil
+	}
+	res := runtime_pollReset(pd.runtimeCtx, mode)
+	return convertErr(res, isFile)
+}
+
+func (pd *pollDesc) prepareRead(isFile bool) error {
+	return pd.prepare('r', isFile)
+}
+
+func (pd *pollDesc) prepareWrite(isFile bool) error {
+	return pd.prepare('w', isFile)
+}
+
+func (pd *pollDesc) wait(mode int, isFile bool) error {
+	if pd.runtimeCtx == 0 {
+		return errors.New("waiting for unsupported file type")
+	}
+	res := runtime_pollWait(pd.runtimeCtx, mode)
+	return convertErr(res, isFile)
+}
+
+func (pd *pollDesc) waitRead(isFile bool) error {
+	return pd.wait('r', isFile)
+}
+
+func (pd *pollDesc) waitWrite(isFile bool) error {
+	return pd.wait('w', isFile)
+}
+
+func (pd *pollDesc) waitCanceled(mode int) {
+	if pd.runtimeCtx == 0 {
+		return
+	}
+	runtime_pollWaitCanceled(pd.runtimeCtx, mode)
+}
+
+func (pd *pollDesc) pollable() bool {
+	return pd.runtimeCtx != 0
+}
+
+// Error values returned by runtime_pollReset and runtime_pollWait.
+// These must match the values in runtime/netpoll.go.
+const (
+	pollNoError        = 0
+	pollErrClosing     = 1
+	pollErrTimeout     = 2
+	pollErrNotPollable = 3
+)
+
+func convertErr(res int, isFile bool) error {
+	switch res {
+	case pollNoError:
+		return nil
+	case pollErrClosing:
+		return errClosing(isFile)
+	case pollErrTimeout:
+		return ErrDeadlineExceeded
+	case pollErrNotPollable:
+		return ErrNotPollable
+	}
+	println("unreachable: ", res)
+	panic("unreachable")
+}
+
+// SetDeadline sets the read and write deadlines associated with fd.
+func (fd *FD) SetDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r'+'w')
+}
+
+// SetReadDeadline sets the read deadline associated with fd.
+func (fd *FD) SetReadDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'r')
+}
+
+// SetWriteDeadline sets the write deadline associated with fd.
+func (fd *FD) SetWriteDeadline(t time.Time) error {
+	return setDeadlineImpl(fd, t, 'w')
+}
+
+func setDeadlineImpl(fd *FD, t time.Time, mode int) error {
+	var d int64
+	if !t.IsZero() {
+		d = int64(time.Until(t))
+		if d == 0 {
+			d = -1 // don't confuse deadline right now with no deadline
+		}
+	}
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	if fd.pd.runtimeCtx == 0 {
+		return ErrNoDeadline
+	}
+	runtime_pollSetDeadline(fd.pd.runtimeCtx, d, mode)
+	return nil
+}
+
+// IsPollDescriptor reports whether fd is the descriptor being used by the poller.
+// This is only used for testing.
+func IsPollDescriptor(fd uintptr) bool {
+	return runtime_isPollServerDescriptor(fd)
+}
diff --git a/src/internal/poll/fd_posix.go b/src/internal/poll/fd_posix.go
new file mode 100644
index 0000000..487f328
--- /dev/null
+++ b/src/internal/poll/fd_posix.go
@@ -0,0 +1,80 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || windows
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris windows
+
+package poll
+
+import (
+	"io"
+	"syscall"
+)
+
+// eofError returns io.EOF when fd is available for reading end of
+// file.
+func (fd *FD) eofError(n int, err error) error {
+	if n == 0 && err == nil && fd.ZeroReadIsEOF {
+		return io.EOF
+	}
+	return err
+}
+
+// Shutdown wraps syscall.Shutdown.
+func (fd *FD) Shutdown(how int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Shutdown(fd.Sysfd, how)
+}
+
+// Fchown wraps syscall.Fchown.
+func (fd *FD) Fchown(uid, gid int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fchown(fd.Sysfd, uid, gid)
+	})
+}
+
+// Ftruncate wraps syscall.Ftruncate.
+func (fd *FD) Ftruncate(size int64) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Ftruncate(fd.Sysfd, size)
+	})
+}
+
+// RawControl invokes the user-defined function f for a non-IO
+// operation.
+func (fd *FD) RawControl(f func(uintptr)) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	f(uintptr(fd.Sysfd))
+	return nil
+}
+
+// ignoringEINTR makes a function call and repeats it if it returns
+// an EINTR error. This appears to be required even though we install all
+// signal handlers with SA_RESTART: see #22838, #38033, #38836, #40846.
+// Also #20400 and #36644 are issues in which a signal handler is
+// installed without setting SA_RESTART. None of these are the common case,
+// but there are enough of them that it seems that we can't avoid
+// an EINTR loop.
+func ignoringEINTR(fn func() error) error {
+	for {
+		err := fn()
+		if err != syscall.EINTR {
+			return err
+		}
+	}
+}
diff --git a/src/internal/poll/fd_posix_test.go b/src/internal/poll/fd_posix_test.go
new file mode 100644
index 0000000..1dcf51d
--- /dev/null
+++ b/src/internal/poll/fd_posix_test.go
@@ -0,0 +1,44 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+package poll_test
+
+import (
+	. "internal/poll"
+	"io"
+	"testing"
+)
+
+var eofErrorTests = []struct {
+	n        int
+	err      error
+	fd       *FD
+	expected error
+}{
+	{100, nil, &FD{ZeroReadIsEOF: true}, nil},
+	{100, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{100, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
+	{0, nil, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{0, io.EOF, &FD{ZeroReadIsEOF: true}, io.EOF},
+	{0, ErrNetClosing, &FD{ZeroReadIsEOF: true}, ErrNetClosing},
+
+	{100, nil, &FD{ZeroReadIsEOF: false}, nil},
+	{100, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
+	{100, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
+	{0, nil, &FD{ZeroReadIsEOF: false}, nil},
+	{0, io.EOF, &FD{ZeroReadIsEOF: false}, io.EOF},
+	{0, ErrNetClosing, &FD{ZeroReadIsEOF: false}, ErrNetClosing},
+}
+
+func TestEOFError(t *testing.T) {
+	for _, tt := range eofErrorTests {
+		actual := tt.fd.EOFError(tt.n, tt.err)
+		if actual != tt.expected {
+			t.Errorf("eofError(%v, %v, %v): expected %v, actual %v", tt.n, tt.err, tt.fd.ZeroReadIsEOF, tt.expected, actual)
+		}
+	}
+}
diff --git a/src/internal/poll/fd_unix.go b/src/internal/poll/fd_unix.go
new file mode 100644
index 0000000..3b17cd2
--- /dev/null
+++ b/src/internal/poll/fd_unix.go
@@ -0,0 +1,587 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris
+
+package poll
+
+import (
+	"io"
+	"sync/atomic"
+	"syscall"
+)
+
+// FD is a file descriptor. The net and os packages use this type as a
+// field of a larger type representing a network connection or OS file.
+type FD struct {
+	// Lock sysfd and serialize access to Read and Write methods.
+	fdmu fdMutex
+
+	// System file descriptor. Immutable until Close.
+	Sysfd int
+
+	// I/O poller.
+	pd pollDesc
+
+	// Writev cache.
+	iovecs *[]syscall.Iovec
+
+	// Semaphore signaled when file is closed.
+	csema uint32
+
+	// Non-zero if this file has been set to blocking mode.
+	isBlocking uint32
+
+	// Whether this is a streaming descriptor, as opposed to a
+	// packet-based descriptor like a UDP socket. Immutable.
+	IsStream bool
+
+	// Whether a zero byte read indicates EOF. This is false for a
+	// message based socket connection.
+	ZeroReadIsEOF bool
+
+	// Whether this is a file rather than a network socket.
+	isFile bool
+}
+
+// Init initializes the FD. The Sysfd field should already be set.
+// This can be called multiple times on a single FD.
+// The net argument is a network name from the net package (e.g., "tcp"),
+// or "file".
+// Set pollable to true if fd should be managed by runtime netpoll.
+func (fd *FD) Init(net string, pollable bool) error {
+	// We don't actually care about the various network types.
+	if net == "file" {
+		fd.isFile = true
+	}
+	if !pollable {
+		fd.isBlocking = 1
+		return nil
+	}
+	err := fd.pd.init(fd)
+	if err != nil {
+		// If we could not initialize the runtime poller,
+		// assume we are using blocking mode.
+		fd.isBlocking = 1
+	}
+	return err
+}
+
+// Destroy closes the file descriptor. This is called when there are
+// no remaining references.
+func (fd *FD) destroy() error {
+	// Poller may want to unregister fd in readiness notification mechanism,
+	// so this must be executed before CloseFunc.
+	fd.pd.close()
+
+	// We don't use ignoringEINTR here because POSIX does not define
+	// whether the descriptor is closed if close returns EINTR.
+	// If the descriptor is indeed closed, using a loop would race
+	// with some other goroutine opening a new descriptor.
+	// (The Linux kernel guarantees that it is closed on an EINTR error.)
+	err := CloseFunc(fd.Sysfd)
+
+	fd.Sysfd = -1
+	runtime_Semrelease(&fd.csema)
+	return err
+}
+
+// Close closes the FD. The underlying file descriptor is closed by the
+// destroy method when there are no remaining references.
+func (fd *FD) Close() error {
+	if !fd.fdmu.increfAndClose() {
+		return errClosing(fd.isFile)
+	}
+
+	// Unblock any I/O.  Once it all unblocks and returns,
+	// so that it cannot be referring to fd.sysfd anymore,
+	// the final decref will close fd.sysfd. This should happen
+	// fairly quickly, since all the I/O is non-blocking, and any
+	// attempts to block in the pollDesc will return errClosing(fd.isFile).
+	fd.pd.evict()
+
+	// The call to decref will call destroy if there are no other
+	// references.
+	err := fd.decref()
+
+	// Wait until the descriptor is closed. If this was the only
+	// reference, it is already closed. Only wait if the file has
+	// not been set to blocking mode, as otherwise any current I/O
+	// may be blocking, and that would block the Close.
+	// No need for an atomic read of isBlocking, increfAndClose means
+	// we have exclusive access to fd.
+	if fd.isBlocking == 0 {
+		runtime_Semacquire(&fd.csema)
+	}
+
+	return err
+}
+
+// SetBlocking puts the file into blocking mode.
+func (fd *FD) SetBlocking() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	// Atomic store so that concurrent calls to SetBlocking
+	// do not cause a race condition. isBlocking only ever goes
+	// from 0 to 1 so there is no real race here.
+	atomic.StoreUint32(&fd.isBlocking, 1)
+	return syscall.SetNonblock(fd.Sysfd, false)
+}
+
+// Darwin and FreeBSD can't read or write 2GB+ files at a time,
+// even on 64-bit systems.
+// The same is true of socket implementations on many systems.
+// See golang.org/issue/7812 and golang.org/issue/16266.
+// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
+const maxRW = 1 << 30
+
+// Read implements io.Reader.
+func (fd *FD) Read(p []byte) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+	if len(p) == 0 {
+		// If the caller wanted a zero byte read, return immediately
+		// without trying (but after acquiring the readLock).
+		// Otherwise syscall.Read returns 0, nil which looks like
+		// io.EOF.
+		// TODO(bradfitz): make it wait for readability? (Issue 15735)
+		return 0, nil
+	}
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, err
+	}
+	if fd.IsStream && len(p) > maxRW {
+		p = p[:maxRW]
+	}
+	for {
+		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
+		if err != nil {
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, err
+	}
+}
+
+// Pread wraps the pread system call.
+func (fd *FD) Pread(p []byte, off int64) (int, error) {
+	// Call incref, not readLock, because since pread specifies the
+	// offset it is independent from other reads.
+	// Similarly, using the poller doesn't make sense for pread.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	if fd.IsStream && len(p) > maxRW {
+		p = p[:maxRW]
+	}
+	var (
+		n   int
+		err error
+	)
+	for {
+		n, err = syscall.Pread(fd.Sysfd, p, off)
+		if err != syscall.EINTR {
+			break
+		}
+	}
+	if err != nil {
+		n = 0
+	}
+	fd.decref()
+	err = fd.eofError(n, err)
+	return n, err
+}
+
+// ReadFrom wraps the recvfrom network call.
+func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, nil, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, nil, err
+	}
+	for {
+		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, sa, err
+	}
+}
+
+// ReadMsg wraps the recvmsg network call.
+func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, 0, 0, nil, err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return 0, 0, 0, nil, err
+	}
+	for {
+		n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			// TODO(dfc) should n and oobn be set to 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		err = fd.eofError(n, err)
+		return n, oobn, sysflags, sa, err
+	}
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(p []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	var nn int
+	for {
+		max := len(p)
+		if fd.IsStream && max-nn > maxRW {
+			max = nn + maxRW
+		}
+		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
+		if n > 0 {
+			nn += n
+		}
+		if nn == len(p) {
+			return nn, err
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return nn, err
+		}
+		if n == 0 {
+			return nn, io.ErrUnexpectedEOF
+		}
+	}
+}
+
+// Pwrite wraps the pwrite system call.
+func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
+	// Call incref, not writeLock, because since pwrite specifies the
+	// offset it is independent from other writes.
+	// Similarly, using the poller doesn't make sense for pwrite.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	var nn int
+	for {
+		max := len(p)
+		if fd.IsStream && max-nn > maxRW {
+			max = nn + maxRW
+		}
+		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
+		if err == syscall.EINTR {
+			continue
+		}
+		if n > 0 {
+			nn += n
+		}
+		if nn == len(p) {
+			return nn, err
+		}
+		if err != nil {
+			return nn, err
+		}
+		if n == 0 {
+			return nn, io.ErrUnexpectedEOF
+		}
+	}
+}
+
+// WriteTo wraps the sendto network call.
+func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return 0, err
+		}
+		return len(p), nil
+	}
+}
+
+// WriteMsg wraps the sendmsg network call.
+func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, 0, err
+	}
+	for {
+		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err == syscall.EAGAIN && fd.pd.pollable() {
+			if err = fd.pd.waitWrite(fd.isFile); err == nil {
+				continue
+			}
+		}
+		if err != nil {
+			return n, 0, err
+		}
+		return n, len(oob), err
+	}
+}
+
+// Accept wraps the accept network call.
+func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
+	if err := fd.readLock(); err != nil {
+		return -1, nil, "", err
+	}
+	defer fd.readUnlock()
+
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return -1, nil, "", err
+	}
+	for {
+		s, rsa, errcall, err := accept(fd.Sysfd)
+		if err == nil {
+			return s, rsa, "", err
+		}
+		switch err {
+		case syscall.EINTR:
+			continue
+		case syscall.EAGAIN:
+			if fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		case syscall.ECONNABORTED:
+			// This means that a socket on the listen
+			// queue was closed before we Accept()ed it;
+			// it's a silly error, so try again.
+			continue
+		}
+		return -1, nil, errcall, err
+	}
+}
+
+// Seek wraps syscall.Seek.
+func (fd *FD) Seek(offset int64, whence int) (int64, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	return syscall.Seek(fd.Sysfd, offset, whence)
+}
+
+// ReadDirent wraps syscall.ReadDirent.
+// We treat this like an ordinary system call rather than a call
+// that tries to fill the buffer.
+func (fd *FD) ReadDirent(buf []byte) (int, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	for {
+		n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
+		if err != nil {
+			n = 0
+			if err == syscall.EAGAIN && fd.pd.pollable() {
+				if err = fd.pd.waitRead(fd.isFile); err == nil {
+					continue
+				}
+			}
+		}
+		// Do not call eofError; caller does not expect to see io.EOF.
+		return n, err
+	}
+}
+
+// Fchmod wraps syscall.Fchmod.
+func (fd *FD) Fchmod(mode uint32) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fchmod(fd.Sysfd, mode)
+	})
+}
+
+// Fchdir wraps syscall.Fchdir.
+func (fd *FD) Fchdir() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Fchdir(fd.Sysfd)
+}
+
+// Fstat wraps syscall.Fstat
+func (fd *FD) Fstat(s *syscall.Stat_t) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return ignoringEINTR(func() error {
+		return syscall.Fstat(fd.Sysfd, s)
+	})
+}
+
+// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
+// If the kernel doesn't support it, this is set to 0.
+var tryDupCloexec = int32(1)
+
+// DupCloseOnExec dups fd and marks it close-on-exec.
+func DupCloseOnExec(fd int) (int, string, error) {
+	if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 {
+		r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
+		if e1 == nil {
+			return r0, "", nil
+		}
+		switch e1.(syscall.Errno) {
+		case syscall.EINVAL, syscall.ENOSYS:
+			// Old kernel, or js/wasm (which returns
+			// ENOSYS). Fall back to the portable way from
+			// now on.
+			atomic.StoreInt32(&tryDupCloexec, 0)
+		default:
+			return -1, "fcntl", e1
+		}
+	}
+	return dupCloseOnExecOld(fd)
+}
+
+// dupCloseOnExecOld is the traditional way to dup an fd and
+// set its O_CLOEXEC bit, using two system calls.
+func dupCloseOnExecOld(fd int) (int, string, error) {
+	syscall.ForkLock.RLock()
+	defer syscall.ForkLock.RUnlock()
+	newfd, err := syscall.Dup(fd)
+	if err != nil {
+		return -1, "dup", err
+	}
+	syscall.CloseOnExec(newfd)
+	return newfd, "", nil
+}
+
+// Dup duplicates the file descriptor.
+func (fd *FD) Dup() (int, string, error) {
+	if err := fd.incref(); err != nil {
+		return -1, "", err
+	}
+	defer fd.decref()
+	return DupCloseOnExec(fd.Sysfd)
+}
+
+// On Unix variants only, expose the IO event for the net code.
+
+// WaitWrite waits until data can be read from fd.
+func (fd *FD) WaitWrite() error {
+	return fd.pd.waitWrite(fd.isFile)
+}
+
+// WriteOnce is for testing only. It makes a single write call.
+func (fd *FD) WriteOnce(p []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+	if err := fd.readLock(); err != nil {
+		return err
+	}
+	defer fd.readUnlock()
+	if err := fd.pd.prepareRead(fd.isFile); err != nil {
+		return err
+	}
+	for {
+		if f(uintptr(fd.Sysfd)) {
+			return nil
+		}
+		if err := fd.pd.waitRead(fd.isFile); err != nil {
+			return err
+		}
+	}
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+	if err := fd.writeLock(); err != nil {
+		return err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return err
+	}
+	for {
+		if f(uintptr(fd.Sysfd)) {
+			return nil
+		}
+		if err := fd.pd.waitWrite(fd.isFile); err != nil {
+			return err
+		}
+	}
+}
+
+// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
+func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
+	for {
+		n, err := fn(fd, p)
+		if err != syscall.EINTR {
+			return n, err
+		}
+	}
+}
diff --git a/src/internal/poll/fd_windows.go b/src/internal/poll/fd_windows.go
new file mode 100644
index 0000000..4a51695
--- /dev/null
+++ b/src/internal/poll/fd_windows.go
@@ -0,0 +1,1068 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"errors"
+	"internal/race"
+	"internal/syscall/windows"
+	"io"
+	"sync"
+	"syscall"
+	"unicode/utf16"
+	"unicode/utf8"
+	"unsafe"
+)
+
+var (
+	initErr error
+	ioSync  uint64
+)
+
+// This package uses the SetFileCompletionNotificationModes Windows
+// API to skip calling GetQueuedCompletionStatus if an IO operation
+// completes synchronously. There is a known bug where
+// SetFileCompletionNotificationModes crashes on some systems (see
+// https://support.microsoft.com/kb/2568167 for details).
+
+var useSetFileCompletionNotificationModes bool // determines is SetFileCompletionNotificationModes is present and safe to use
+
+// checkSetFileCompletionNotificationModes verifies that
+// SetFileCompletionNotificationModes Windows API is present
+// on the system and is safe to use.
+// See https://support.microsoft.com/kb/2568167 for details.
+func checkSetFileCompletionNotificationModes() {
+	err := syscall.LoadSetFileCompletionNotificationModes()
+	if err != nil {
+		return
+	}
+	protos := [2]int32{syscall.IPPROTO_TCP, 0}
+	var buf [32]syscall.WSAProtocolInfo
+	len := uint32(unsafe.Sizeof(buf))
+	n, err := syscall.WSAEnumProtocols(&protos[0], &buf[0], &len)
+	if err != nil {
+		return
+	}
+	for i := int32(0); i < n; i++ {
+		if buf[i].ServiceFlags1&syscall.XP1_IFS_HANDLES == 0 {
+			return
+		}
+	}
+	useSetFileCompletionNotificationModes = true
+}
+
+func init() {
+	var d syscall.WSAData
+	e := syscall.WSAStartup(uint32(0x202), &d)
+	if e != nil {
+		initErr = e
+	}
+	checkSetFileCompletionNotificationModes()
+}
+
+// operation contains superset of data necessary to perform all async IO.
+type operation struct {
+	// Used by IOCP interface, it must be first field
+	// of the struct, as our code rely on it.
+	o syscall.Overlapped
+
+	// fields used by runtime.netpoll
+	runtimeCtx uintptr
+	mode       int32
+	errno      int32
+	qty        uint32
+
+	// fields used only by net package
+	fd     *FD
+	buf    syscall.WSABuf
+	msg    windows.WSAMsg
+	sa     syscall.Sockaddr
+	rsa    *syscall.RawSockaddrAny
+	rsan   int32
+	handle syscall.Handle
+	flags  uint32
+	bufs   []syscall.WSABuf
+}
+
+func (o *operation) InitBuf(buf []byte) {
+	o.buf.Len = uint32(len(buf))
+	o.buf.Buf = nil
+	if len(buf) != 0 {
+		o.buf.Buf = &buf[0]
+	}
+}
+
+func (o *operation) InitBufs(buf *[][]byte) {
+	if o.bufs == nil {
+		o.bufs = make([]syscall.WSABuf, 0, len(*buf))
+	} else {
+		o.bufs = o.bufs[:0]
+	}
+	for _, b := range *buf {
+		if len(b) == 0 {
+			o.bufs = append(o.bufs, syscall.WSABuf{})
+			continue
+		}
+		for len(b) > maxRW {
+			o.bufs = append(o.bufs, syscall.WSABuf{Len: maxRW, Buf: &b[0]})
+			b = b[maxRW:]
+		}
+		if len(b) > 0 {
+			o.bufs = append(o.bufs, syscall.WSABuf{Len: uint32(len(b)), Buf: &b[0]})
+		}
+	}
+}
+
+// ClearBufs clears all pointers to Buffers parameter captured
+// by InitBufs, so it can be released by garbage collector.
+func (o *operation) ClearBufs() {
+	for i := range o.bufs {
+		o.bufs[i].Buf = nil
+	}
+	o.bufs = o.bufs[:0]
+}
+
+func (o *operation) InitMsg(p []byte, oob []byte) {
+	o.InitBuf(p)
+	o.msg.Buffers = &o.buf
+	o.msg.BufferCount = 1
+
+	o.msg.Name = nil
+	o.msg.Namelen = 0
+
+	o.msg.Flags = 0
+	o.msg.Control.Len = uint32(len(oob))
+	o.msg.Control.Buf = nil
+	if len(oob) != 0 {
+		o.msg.Control.Buf = &oob[0]
+	}
+}
+
+// execIO executes a single IO operation o. It submits and cancels
+// IO in the current thread for systems where Windows CancelIoEx API
+// is available. Alternatively, it passes the request onto
+// runtime netpoll and waits for completion or cancels request.
+func execIO(o *operation, submit func(o *operation) error) (int, error) {
+	if o.fd.pd.runtimeCtx == 0 {
+		return 0, errors.New("internal error: polling on unsupported descriptor type")
+	}
+
+	fd := o.fd
+	// Notify runtime netpoll about starting IO.
+	err := fd.pd.prepare(int(o.mode), fd.isFile)
+	if err != nil {
+		return 0, err
+	}
+	// Start IO.
+	err = submit(o)
+	switch err {
+	case nil:
+		// IO completed immediately
+		if o.fd.skipSyncNotif {
+			// No completion message will follow, so return immediately.
+			return int(o.qty), nil
+		}
+		// Need to get our completion message anyway.
+	case syscall.ERROR_IO_PENDING:
+		// IO started, and we have to wait for its completion.
+		err = nil
+	default:
+		return 0, err
+	}
+	// Wait for our request to complete.
+	err = fd.pd.wait(int(o.mode), fd.isFile)
+	if err == nil {
+		// All is good. Extract our IO results and return.
+		if o.errno != 0 {
+			err = syscall.Errno(o.errno)
+			// More data available. Return back the size of received data.
+			if err == syscall.ERROR_MORE_DATA || err == windows.WSAEMSGSIZE {
+				return int(o.qty), err
+			}
+			return 0, err
+		}
+		return int(o.qty), nil
+	}
+	// IO is interrupted by "close" or "timeout"
+	netpollErr := err
+	switch netpollErr {
+	case ErrNetClosing, ErrFileClosing, ErrDeadlineExceeded:
+		// will deal with those.
+	default:
+		panic("unexpected runtime.netpoll error: " + netpollErr.Error())
+	}
+	// Cancel our request.
+	err = syscall.CancelIoEx(fd.Sysfd, &o.o)
+	// Assuming ERROR_NOT_FOUND is returned, if IO is completed.
+	if err != nil && err != syscall.ERROR_NOT_FOUND {
+		// TODO(brainman): maybe do something else, but panic.
+		panic(err)
+	}
+	// Wait for cancellation to complete.
+	fd.pd.waitCanceled(int(o.mode))
+	if o.errno != 0 {
+		err = syscall.Errno(o.errno)
+		if err == syscall.ERROR_OPERATION_ABORTED { // IO Canceled
+			err = netpollErr
+		}
+		return 0, err
+	}
+	// We issued a cancellation request. But, it seems, IO operation succeeded
+	// before the cancellation request run. We need to treat the IO operation as
+	// succeeded (the bytes are actually sent/recv from network).
+	return int(o.qty), nil
+}
+
+// FD is a file descriptor. The net and os packages embed this type in
+// a larger type representing a network connection or OS file.
+type FD struct {
+	// Lock sysfd and serialize access to Read and Write methods.
+	fdmu fdMutex
+
+	// System file descriptor. Immutable until Close.
+	Sysfd syscall.Handle
+
+	// Read operation.
+	rop operation
+	// Write operation.
+	wop operation
+
+	// I/O poller.
+	pd pollDesc
+
+	// Used to implement pread/pwrite.
+	l sync.Mutex
+
+	// For console I/O.
+	lastbits       []byte   // first few bytes of the last incomplete rune in last write
+	readuint16     []uint16 // buffer to hold uint16s obtained with ReadConsole
+	readbyte       []byte   // buffer to hold decoding of readuint16 from utf16 to utf8
+	readbyteOffset int      // readbyte[readOffset:] is yet to be consumed with file.Read
+
+	// Semaphore signaled when file is closed.
+	csema uint32
+
+	skipSyncNotif bool
+
+	// Whether this is a streaming descriptor, as opposed to a
+	// packet-based descriptor like a UDP socket.
+	IsStream bool
+
+	// Whether a zero byte read indicates EOF. This is false for a
+	// message based socket connection.
+	ZeroReadIsEOF bool
+
+	// Whether this is a file rather than a network socket.
+	isFile bool
+
+	// The kind of this file.
+	kind fileKind
+}
+
+// fileKind describes the kind of file.
+type fileKind byte
+
+const (
+	kindNet fileKind = iota
+	kindFile
+	kindConsole
+	kindDir
+	kindPipe
+)
+
+// logInitFD is set by tests to enable file descriptor initialization logging.
+var logInitFD func(net string, fd *FD, err error)
+
+// Init initializes the FD. The Sysfd field should already be set.
+// This can be called multiple times on a single FD.
+// The net argument is a network name from the net package (e.g., "tcp"),
+// or "file" or "console" or "dir".
+// Set pollable to true if fd should be managed by runtime netpoll.
+func (fd *FD) Init(net string, pollable bool) (string, error) {
+	if initErr != nil {
+		return "", initErr
+	}
+
+	switch net {
+	case "file":
+		fd.kind = kindFile
+	case "console":
+		fd.kind = kindConsole
+	case "dir":
+		fd.kind = kindDir
+	case "pipe":
+		fd.kind = kindPipe
+	case "tcp", "tcp4", "tcp6",
+		"udp", "udp4", "udp6",
+		"ip", "ip4", "ip6",
+		"unix", "unixgram", "unixpacket":
+		fd.kind = kindNet
+	default:
+		return "", errors.New("internal error: unknown network type " + net)
+	}
+	fd.isFile = fd.kind != kindNet
+
+	var err error
+	if pollable {
+		// Only call init for a network socket.
+		// This means that we don't add files to the runtime poller.
+		// Adding files to the runtime poller can confuse matters
+		// if the user is doing their own overlapped I/O.
+		// See issue #21172.
+		//
+		// In general the code below avoids calling the execIO
+		// function for non-network sockets. If some method does
+		// somehow call execIO, then execIO, and therefore the
+		// calling method, will return an error, because
+		// fd.pd.runtimeCtx will be 0.
+		err = fd.pd.init(fd)
+	}
+	if logInitFD != nil {
+		logInitFD(net, fd, err)
+	}
+	if err != nil {
+		return "", err
+	}
+	if pollable && useSetFileCompletionNotificationModes {
+		// We do not use events, so we can skip them always.
+		flags := uint8(syscall.FILE_SKIP_SET_EVENT_ON_HANDLE)
+		// It's not safe to skip completion notifications for UDP:
+		// https://docs.microsoft.com/en-us/archive/blogs/winserverperformance/designing-applications-for-high-performance-part-iii
+		if net == "tcp" {
+			flags |= syscall.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS
+		}
+		err := syscall.SetFileCompletionNotificationModes(fd.Sysfd, flags)
+		if err == nil && flags&syscall.FILE_SKIP_COMPLETION_PORT_ON_SUCCESS != 0 {
+			fd.skipSyncNotif = true
+		}
+	}
+	// Disable SIO_UDP_CONNRESET behavior.
+	// http://support.microsoft.com/kb/263823
+	switch net {
+	case "udp", "udp4", "udp6":
+		ret := uint32(0)
+		flag := uint32(0)
+		size := uint32(unsafe.Sizeof(flag))
+		err := syscall.WSAIoctl(fd.Sysfd, syscall.SIO_UDP_CONNRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
+		if err != nil {
+			return "wsaioctl", err
+		}
+	}
+	fd.rop.mode = 'r'
+	fd.wop.mode = 'w'
+	fd.rop.fd = fd
+	fd.wop.fd = fd
+	fd.rop.runtimeCtx = fd.pd.runtimeCtx
+	fd.wop.runtimeCtx = fd.pd.runtimeCtx
+	return "", nil
+}
+
+func (fd *FD) destroy() error {
+	if fd.Sysfd == syscall.InvalidHandle {
+		return syscall.EINVAL
+	}
+	// Poller may want to unregister fd in readiness notification mechanism,
+	// so this must be executed before fd.CloseFunc.
+	fd.pd.close()
+	var err error
+	switch fd.kind {
+	case kindNet:
+		// The net package uses the CloseFunc variable for testing.
+		err = CloseFunc(fd.Sysfd)
+	case kindDir:
+		err = syscall.FindClose(fd.Sysfd)
+	default:
+		err = syscall.CloseHandle(fd.Sysfd)
+	}
+	fd.Sysfd = syscall.InvalidHandle
+	runtime_Semrelease(&fd.csema)
+	return err
+}
+
+// Close closes the FD. The underlying file descriptor is closed by
+// the destroy method when there are no remaining references.
+func (fd *FD) Close() error {
+	if !fd.fdmu.increfAndClose() {
+		return errClosing(fd.isFile)
+	}
+	if fd.kind == kindPipe {
+		syscall.CancelIoEx(fd.Sysfd, nil)
+	}
+	// unblock pending reader and writer
+	fd.pd.evict()
+	err := fd.decref()
+	// Wait until the descriptor is closed. If this was the only
+	// reference, it is already closed.
+	runtime_Semacquire(&fd.csema)
+	return err
+}
+
+// Windows ReadFile and WSARecv use DWORD (uint32) parameter to pass buffer length.
+// This prevents us reading blocks larger than 4GB.
+// See golang.org/issue/26923.
+const maxRW = 1 << 30 // 1GB is large enough and keeps subsequent reads aligned
+
+// Read implements io.Reader.
+func (fd *FD) Read(buf []byte) (int, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, err
+	}
+	defer fd.readUnlock()
+
+	if len(buf) > maxRW {
+		buf = buf[:maxRW]
+	}
+
+	var n int
+	var err error
+	if fd.isFile {
+		fd.l.Lock()
+		defer fd.l.Unlock()
+		switch fd.kind {
+		case kindConsole:
+			n, err = fd.readConsole(buf)
+		default:
+			n, err = syscall.Read(fd.Sysfd, buf)
+			if fd.kind == kindPipe && err == syscall.ERROR_OPERATION_ABORTED {
+				// Close uses CancelIoEx to interrupt concurrent I/O for pipes.
+				// If the fd is a pipe and the Read was interrupted by CancelIoEx,
+				// we assume it is interrupted by Close.
+				err = ErrFileClosing
+			}
+		}
+		if err != nil {
+			n = 0
+		}
+	} else {
+		o := &fd.rop
+		o.InitBuf(buf)
+		n, err = execIO(o, func(o *operation) error {
+			return syscall.WSARecv(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, &o.o, nil)
+		})
+		if race.Enabled {
+			race.Acquire(unsafe.Pointer(&ioSync))
+		}
+	}
+	if len(buf) != 0 {
+		err = fd.eofError(n, err)
+	}
+	return n, err
+}
+
+var ReadConsole = syscall.ReadConsole // changed for testing
+
+// readConsole reads utf16 characters from console File,
+// encodes them into utf8 and stores them in buffer b.
+// It returns the number of utf8 bytes read and an error, if any.
+func (fd *FD) readConsole(b []byte) (int, error) {
+	if len(b) == 0 {
+		return 0, nil
+	}
+
+	if fd.readuint16 == nil {
+		// Note: syscall.ReadConsole fails for very large buffers.
+		// The limit is somewhere around (but not exactly) 16384.
+		// Stay well below.
+		fd.readuint16 = make([]uint16, 0, 10000)
+		fd.readbyte = make([]byte, 0, 4*cap(fd.readuint16))
+	}
+
+	for fd.readbyteOffset >= len(fd.readbyte) {
+		n := cap(fd.readuint16) - len(fd.readuint16)
+		if n > len(b) {
+			n = len(b)
+		}
+		var nw uint32
+		err := ReadConsole(fd.Sysfd, &fd.readuint16[:len(fd.readuint16)+1][len(fd.readuint16)], uint32(n), &nw, nil)
+		if err != nil {
+			return 0, err
+		}
+		uint16s := fd.readuint16[:len(fd.readuint16)+int(nw)]
+		fd.readuint16 = fd.readuint16[:0]
+		buf := fd.readbyte[:0]
+		for i := 0; i < len(uint16s); i++ {
+			r := rune(uint16s[i])
+			if utf16.IsSurrogate(r) {
+				if i+1 == len(uint16s) {
+					if nw > 0 {
+						// Save half surrogate pair for next time.
+						fd.readuint16 = fd.readuint16[:1]
+						fd.readuint16[0] = uint16(r)
+						break
+					}
+					r = utf8.RuneError
+				} else {
+					r = utf16.DecodeRune(r, rune(uint16s[i+1]))
+					if r != utf8.RuneError {
+						i++
+					}
+				}
+			}
+			n := utf8.EncodeRune(buf[len(buf):cap(buf)], r)
+			buf = buf[:len(buf)+n]
+		}
+		fd.readbyte = buf
+		fd.readbyteOffset = 0
+		if nw == 0 {
+			break
+		}
+	}
+
+	src := fd.readbyte[fd.readbyteOffset:]
+	var i int
+	for i = 0; i < len(src) && i < len(b); i++ {
+		x := src[i]
+		if x == 0x1A { // Ctrl-Z
+			if i == 0 {
+				fd.readbyteOffset++
+			}
+			break
+		}
+		b[i] = x
+	}
+	fd.readbyteOffset += i
+	return i, nil
+}
+
+// Pread emulates the Unix pread system call.
+func (fd *FD) Pread(b []byte, off int64) (int, error) {
+	// Call incref, not readLock, because since pread specifies the
+	// offset it is independent from other reads.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+
+	if len(b) > maxRW {
+		b = b[:maxRW]
+	}
+
+	fd.l.Lock()
+	defer fd.l.Unlock()
+	curoffset, e := syscall.Seek(fd.Sysfd, 0, io.SeekCurrent)
+	if e != nil {
+		return 0, e
+	}
+	defer syscall.Seek(fd.Sysfd, curoffset, io.SeekStart)
+	o := syscall.Overlapped{
+		OffsetHigh: uint32(off >> 32),
+		Offset:     uint32(off),
+	}
+	var done uint32
+	e = syscall.ReadFile(fd.Sysfd, b, &done, &o)
+	if e != nil {
+		done = 0
+		if e == syscall.ERROR_HANDLE_EOF {
+			e = io.EOF
+		}
+	}
+	if len(b) != 0 {
+		e = fd.eofError(int(done), e)
+	}
+	return int(done), e
+}
+
+// ReadFrom wraps the recvfrom network call.
+func (fd *FD) ReadFrom(buf []byte) (int, syscall.Sockaddr, error) {
+	if len(buf) == 0 {
+		return 0, nil, nil
+	}
+	if len(buf) > maxRW {
+		buf = buf[:maxRW]
+	}
+	if err := fd.readLock(); err != nil {
+		return 0, nil, err
+	}
+	defer fd.readUnlock()
+	o := &fd.rop
+	o.InitBuf(buf)
+	n, err := execIO(o, func(o *operation) error {
+		if o.rsa == nil {
+			o.rsa = new(syscall.RawSockaddrAny)
+		}
+		o.rsan = int32(unsafe.Sizeof(*o.rsa))
+		return syscall.WSARecvFrom(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, o.rsa, &o.rsan, &o.o, nil)
+	})
+	err = fd.eofError(n, err)
+	if err != nil {
+		return n, nil, err
+	}
+	sa, _ := o.rsa.Sockaddr()
+	return n, sa, nil
+}
+
+// Write implements io.Writer.
+func (fd *FD) Write(buf []byte) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if fd.isFile {
+		fd.l.Lock()
+		defer fd.l.Unlock()
+	}
+
+	ntotal := 0
+	for len(buf) > 0 {
+		b := buf
+		if len(b) > maxRW {
+			b = b[:maxRW]
+		}
+		var n int
+		var err error
+		if fd.isFile {
+			switch fd.kind {
+			case kindConsole:
+				n, err = fd.writeConsole(b)
+			default:
+				n, err = syscall.Write(fd.Sysfd, b)
+				if fd.kind == kindPipe && err == syscall.ERROR_OPERATION_ABORTED {
+					// Close uses CancelIoEx to interrupt concurrent I/O for pipes.
+					// If the fd is a pipe and the Write was interrupted by CancelIoEx,
+					// we assume it is interrupted by Close.
+					err = ErrFileClosing
+				}
+			}
+			if err != nil {
+				n = 0
+			}
+		} else {
+			if race.Enabled {
+				race.ReleaseMerge(unsafe.Pointer(&ioSync))
+			}
+			o := &fd.wop
+			o.InitBuf(b)
+			n, err = execIO(o, func(o *operation) error {
+				return syscall.WSASend(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, &o.o, nil)
+			})
+		}
+		ntotal += n
+		if err != nil {
+			return ntotal, err
+		}
+		buf = buf[n:]
+	}
+	return ntotal, nil
+}
+
+// writeConsole writes len(b) bytes to the console File.
+// It returns the number of bytes written and an error, if any.
+func (fd *FD) writeConsole(b []byte) (int, error) {
+	n := len(b)
+	runes := make([]rune, 0, 256)
+	if len(fd.lastbits) > 0 {
+		b = append(fd.lastbits, b...)
+		fd.lastbits = nil
+
+	}
+	for len(b) >= utf8.UTFMax || utf8.FullRune(b) {
+		r, l := utf8.DecodeRune(b)
+		runes = append(runes, r)
+		b = b[l:]
+	}
+	if len(b) > 0 {
+		fd.lastbits = make([]byte, len(b))
+		copy(fd.lastbits, b)
+	}
+	// syscall.WriteConsole seems to fail, if given large buffer.
+	// So limit the buffer to 16000 characters. This number was
+	// discovered by experimenting with syscall.WriteConsole.
+	const maxWrite = 16000
+	for len(runes) > 0 {
+		m := len(runes)
+		if m > maxWrite {
+			m = maxWrite
+		}
+		chunk := runes[:m]
+		runes = runes[m:]
+		uint16s := utf16.Encode(chunk)
+		for len(uint16s) > 0 {
+			var written uint32
+			err := syscall.WriteConsole(fd.Sysfd, &uint16s[0], uint32(len(uint16s)), &written, nil)
+			if err != nil {
+				return 0, err
+			}
+			uint16s = uint16s[written:]
+		}
+	}
+	return n, nil
+}
+
+// Pwrite emulates the Unix pwrite system call.
+func (fd *FD) Pwrite(buf []byte, off int64) (int, error) {
+	// Call incref, not writeLock, because since pwrite specifies the
+	// offset it is independent from other writes.
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+
+	fd.l.Lock()
+	defer fd.l.Unlock()
+	curoffset, e := syscall.Seek(fd.Sysfd, 0, io.SeekCurrent)
+	if e != nil {
+		return 0, e
+	}
+	defer syscall.Seek(fd.Sysfd, curoffset, io.SeekStart)
+
+	ntotal := 0
+	for len(buf) > 0 {
+		b := buf
+		if len(b) > maxRW {
+			b = b[:maxRW]
+		}
+		var n uint32
+		o := syscall.Overlapped{
+			OffsetHigh: uint32(off >> 32),
+			Offset:     uint32(off),
+		}
+		e = syscall.WriteFile(fd.Sysfd, b, &n, &o)
+		ntotal += int(n)
+		if e != nil {
+			return ntotal, e
+		}
+		buf = buf[n:]
+		off += int64(n)
+	}
+	return ntotal, nil
+}
+
+// Writev emulates the Unix writev system call.
+func (fd *FD) Writev(buf *[][]byte) (int64, error) {
+	if len(*buf) == 0 {
+		return 0, nil
+	}
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if race.Enabled {
+		race.ReleaseMerge(unsafe.Pointer(&ioSync))
+	}
+	o := &fd.wop
+	o.InitBufs(buf)
+	n, err := execIO(o, func(o *operation) error {
+		return syscall.WSASend(o.fd.Sysfd, &o.bufs[0], uint32(len(o.bufs)), &o.qty, 0, &o.o, nil)
+	})
+	o.ClearBufs()
+	TestHookDidWritev(n)
+	consume(buf, int64(n))
+	return int64(n), err
+}
+
+// WriteTo wraps the sendto network call.
+func (fd *FD) WriteTo(buf []byte, sa syscall.Sockaddr) (int, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+
+	if len(buf) == 0 {
+		// handle zero-byte payload
+		o := &fd.wop
+		o.InitBuf(buf)
+		o.sa = sa
+		n, err := execIO(o, func(o *operation) error {
+			return syscall.WSASendto(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, o.sa, &o.o, nil)
+		})
+		return n, err
+	}
+
+	ntotal := 0
+	for len(buf) > 0 {
+		b := buf
+		if len(b) > maxRW {
+			b = b[:maxRW]
+		}
+		o := &fd.wop
+		o.InitBuf(b)
+		o.sa = sa
+		n, err := execIO(o, func(o *operation) error {
+			return syscall.WSASendto(o.fd.Sysfd, &o.buf, 1, &o.qty, 0, o.sa, &o.o, nil)
+		})
+		ntotal += int(n)
+		if err != nil {
+			return ntotal, err
+		}
+		buf = buf[n:]
+	}
+	return ntotal, nil
+}
+
+// Call ConnectEx. This doesn't need any locking, since it is only
+// called when the descriptor is first created. This is here rather
+// than in the net package so that it can use fd.wop.
+func (fd *FD) ConnectEx(ra syscall.Sockaddr) error {
+	o := &fd.wop
+	o.sa = ra
+	_, err := execIO(o, func(o *operation) error {
+		return ConnectExFunc(o.fd.Sysfd, o.sa, nil, 0, nil, &o.o)
+	})
+	return err
+}
+
+func (fd *FD) acceptOne(s syscall.Handle, rawsa []syscall.RawSockaddrAny, o *operation) (string, error) {
+	// Submit accept request.
+	o.handle = s
+	o.rsan = int32(unsafe.Sizeof(rawsa[0]))
+	_, err := execIO(o, func(o *operation) error {
+		return AcceptFunc(o.fd.Sysfd, o.handle, (*byte)(unsafe.Pointer(&rawsa[0])), 0, uint32(o.rsan), uint32(o.rsan), &o.qty, &o.o)
+	})
+	if err != nil {
+		CloseFunc(s)
+		return "acceptex", err
+	}
+
+	// Inherit properties of the listening socket.
+	err = syscall.Setsockopt(s, syscall.SOL_SOCKET, syscall.SO_UPDATE_ACCEPT_CONTEXT, (*byte)(unsafe.Pointer(&fd.Sysfd)), int32(unsafe.Sizeof(fd.Sysfd)))
+	if err != nil {
+		CloseFunc(s)
+		return "setsockopt", err
+	}
+
+	return "", nil
+}
+
+// Accept handles accepting a socket. The sysSocket parameter is used
+// to allocate the net socket.
+func (fd *FD) Accept(sysSocket func() (syscall.Handle, error)) (syscall.Handle, []syscall.RawSockaddrAny, uint32, string, error) {
+	if err := fd.readLock(); err != nil {
+		return syscall.InvalidHandle, nil, 0, "", err
+	}
+	defer fd.readUnlock()
+
+	o := &fd.rop
+	var rawsa [2]syscall.RawSockaddrAny
+	for {
+		s, err := sysSocket()
+		if err != nil {
+			return syscall.InvalidHandle, nil, 0, "", err
+		}
+
+		errcall, err := fd.acceptOne(s, rawsa[:], o)
+		if err == nil {
+			return s, rawsa[:], uint32(o.rsan), "", nil
+		}
+
+		// Sometimes we see WSAECONNRESET and ERROR_NETNAME_DELETED is
+		// returned here. These happen if connection reset is received
+		// before AcceptEx could complete. These errors relate to new
+		// connection, not to AcceptEx, so ignore broken connection and
+		// try AcceptEx again for more connections.
+		errno, ok := err.(syscall.Errno)
+		if !ok {
+			return syscall.InvalidHandle, nil, 0, errcall, err
+		}
+		switch errno {
+		case syscall.ERROR_NETNAME_DELETED, syscall.WSAECONNRESET:
+			// ignore these and try again
+		default:
+			return syscall.InvalidHandle, nil, 0, errcall, err
+		}
+	}
+}
+
+// Seek wraps syscall.Seek.
+func (fd *FD) Seek(offset int64, whence int) (int64, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+
+	fd.l.Lock()
+	defer fd.l.Unlock()
+
+	return syscall.Seek(fd.Sysfd, offset, whence)
+}
+
+// FindNextFile wraps syscall.FindNextFile.
+func (fd *FD) FindNextFile(data *syscall.Win32finddata) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.FindNextFile(fd.Sysfd, data)
+}
+
+// Fchmod updates syscall.ByHandleFileInformation.Fileattributes when needed.
+func (fd *FD) Fchmod(mode uint32) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+
+	var d syscall.ByHandleFileInformation
+	if err := syscall.GetFileInformationByHandle(fd.Sysfd, &d); err != nil {
+		return err
+	}
+	attrs := d.FileAttributes
+	if mode&syscall.S_IWRITE != 0 {
+		attrs &^= syscall.FILE_ATTRIBUTE_READONLY
+	} else {
+		attrs |= syscall.FILE_ATTRIBUTE_READONLY
+	}
+	if attrs == d.FileAttributes {
+		return nil
+	}
+
+	var du windows.FILE_BASIC_INFO
+	du.FileAttributes = attrs
+	l := uint32(unsafe.Sizeof(d))
+	return windows.SetFileInformationByHandle(fd.Sysfd, windows.FileBasicInfo, uintptr(unsafe.Pointer(&du)), l)
+}
+
+// Fchdir wraps syscall.Fchdir.
+func (fd *FD) Fchdir() error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Fchdir(fd.Sysfd)
+}
+
+// GetFileType wraps syscall.GetFileType.
+func (fd *FD) GetFileType() (uint32, error) {
+	if err := fd.incref(); err != nil {
+		return 0, err
+	}
+	defer fd.decref()
+	return syscall.GetFileType(fd.Sysfd)
+}
+
+// GetFileInformationByHandle wraps GetFileInformationByHandle.
+func (fd *FD) GetFileInformationByHandle(data *syscall.ByHandleFileInformation) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.GetFileInformationByHandle(fd.Sysfd, data)
+}
+
+// RawRead invokes the user-defined function f for a read operation.
+func (fd *FD) RawRead(f func(uintptr) bool) error {
+	if err := fd.readLock(); err != nil {
+		return err
+	}
+	defer fd.readUnlock()
+	for {
+		if f(uintptr(fd.Sysfd)) {
+			return nil
+		}
+
+		// Use a zero-byte read as a way to get notified when this
+		// socket is readable. h/t https://stackoverflow.com/a/42019668/332798
+		o := &fd.rop
+		o.InitBuf(nil)
+		if !fd.IsStream {
+			o.flags |= windows.MSG_PEEK
+		}
+		_, err := execIO(o, func(o *operation) error {
+			return syscall.WSARecv(o.fd.Sysfd, &o.buf, 1, &o.qty, &o.flags, &o.o, nil)
+		})
+		if err == windows.WSAEMSGSIZE {
+			// expected with a 0-byte peek, ignore.
+		} else if err != nil {
+			return err
+		}
+	}
+}
+
+// RawWrite invokes the user-defined function f for a write operation.
+func (fd *FD) RawWrite(f func(uintptr) bool) error {
+	if err := fd.writeLock(); err != nil {
+		return err
+	}
+	defer fd.writeUnlock()
+
+	if f(uintptr(fd.Sysfd)) {
+		return nil
+	}
+
+	// TODO(tmm1): find a way to detect socket writability
+	return syscall.EWINDOWS
+}
+
+func sockaddrToRaw(sa syscall.Sockaddr) (unsafe.Pointer, int32, error) {
+	switch sa := sa.(type) {
+	case *syscall.SockaddrInet4:
+		var raw syscall.RawSockaddrInet4
+		raw.Family = syscall.AF_INET
+		p := (*[2]byte)(unsafe.Pointer(&raw.Port))
+		p[0] = byte(sa.Port >> 8)
+		p[1] = byte(sa.Port)
+		for i := 0; i < len(sa.Addr); i++ {
+			raw.Addr[i] = sa.Addr[i]
+		}
+		return unsafe.Pointer(&raw), int32(unsafe.Sizeof(raw)), nil
+	case *syscall.SockaddrInet6:
+		var raw syscall.RawSockaddrInet6
+		raw.Family = syscall.AF_INET6
+		p := (*[2]byte)(unsafe.Pointer(&raw.Port))
+		p[0] = byte(sa.Port >> 8)
+		p[1] = byte(sa.Port)
+		raw.Scope_id = sa.ZoneId
+		for i := 0; i < len(sa.Addr); i++ {
+			raw.Addr[i] = sa.Addr[i]
+		}
+		return unsafe.Pointer(&raw), int32(unsafe.Sizeof(raw)), nil
+	default:
+		return nil, 0, syscall.EWINDOWS
+	}
+}
+
+// ReadMsg wraps the WSARecvMsg network call.
+func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
+	if err := fd.readLock(); err != nil {
+		return 0, 0, 0, nil, err
+	}
+	defer fd.readUnlock()
+
+	if len(p) > maxRW {
+		p = p[:maxRW]
+	}
+
+	o := &fd.rop
+	o.InitMsg(p, oob)
+	o.rsa = new(syscall.RawSockaddrAny)
+	o.msg.Name = (syscall.Pointer)(unsafe.Pointer(o.rsa))
+	o.msg.Namelen = int32(unsafe.Sizeof(*o.rsa))
+	o.msg.Flags = uint32(flags)
+	n, err := execIO(o, func(o *operation) error {
+		return windows.WSARecvMsg(o.fd.Sysfd, &o.msg, &o.qty, &o.o, nil)
+	})
+	err = fd.eofError(n, err)
+	var sa syscall.Sockaddr
+	if err == nil {
+		sa, err = o.rsa.Sockaddr()
+	}
+	return n, int(o.msg.Control.Len), int(o.msg.Flags), sa, err
+}
+
+// WriteMsg wraps the WSASendMsg network call.
+func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
+	if len(p) > maxRW {
+		return 0, 0, errors.New("packet is too large (only 1GB is allowed)")
+	}
+
+	if err := fd.writeLock(); err != nil {
+		return 0, 0, err
+	}
+	defer fd.writeUnlock()
+
+	o := &fd.wop
+	o.InitMsg(p, oob)
+	if sa != nil {
+		rsa, len, err := sockaddrToRaw(sa)
+		if err != nil {
+			return 0, 0, err
+		}
+		o.msg.Name = (syscall.Pointer)(rsa)
+		o.msg.Namelen = len
+	}
+	n, err := execIO(o, func(o *operation) error {
+		return windows.WSASendMsg(o.fd.Sysfd, &o.msg, 0, &o.qty, &o.o, nil)
+	})
+	return n, int(o.msg.Control.Len), err
+}
diff --git a/src/internal/poll/fd_windows_test.go b/src/internal/poll/fd_windows_test.go
new file mode 100644
index 0000000..e3ca0e2
--- /dev/null
+++ b/src/internal/poll/fd_windows_test.go
@@ -0,0 +1,111 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"fmt"
+	"internal/poll"
+	"os"
+	"sync"
+	"syscall"
+	"testing"
+)
+
+type loggedFD struct {
+	Net string
+	FD  *poll.FD
+	Err error
+}
+
+var (
+	logMu     sync.Mutex
+	loggedFDs map[syscall.Handle]*loggedFD
+)
+
+func logFD(net string, fd *poll.FD, err error) {
+	logMu.Lock()
+	defer logMu.Unlock()
+
+	loggedFDs[fd.Sysfd] = &loggedFD{
+		Net: net,
+		FD:  fd,
+		Err: err,
+	}
+}
+
+func init() {
+	loggedFDs = make(map[syscall.Handle]*loggedFD)
+	*poll.LogInitFD = logFD
+}
+
+func findLoggedFD(h syscall.Handle) (lfd *loggedFD, found bool) {
+	logMu.Lock()
+	defer logMu.Unlock()
+
+	lfd, found = loggedFDs[h]
+	return lfd, found
+}
+
+// checkFileIsNotPartOfNetpoll verifies that f is not managed by netpoll.
+// It returns error, if check fails.
+func checkFileIsNotPartOfNetpoll(f *os.File) error {
+	lfd, found := findLoggedFD(syscall.Handle(f.Fd()))
+	if !found {
+		return fmt.Errorf("%v fd=%v: is not found in the log", f.Name(), f.Fd())
+	}
+	if lfd.FD.IsPartOfNetpoll() {
+		return fmt.Errorf("%v fd=%v: is part of netpoll, but should not be (logged: net=%v err=%v)", f.Name(), f.Fd(), lfd.Net, lfd.Err)
+	}
+	return nil
+}
+
+func TestFileFdsAreInitialised(t *testing.T) {
+	exe, err := os.Executable()
+	if err != nil {
+		t.Fatal(err)
+	}
+	f, err := os.Open(exe)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer f.Close()
+
+	err = checkFileIsNotPartOfNetpoll(f)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestSerialFdsAreInitialised(t *testing.T) {
+	for _, name := range []string{"COM1", "COM2", "COM3", "COM4"} {
+		t.Run(name, func(t *testing.T) {
+			h, err := syscall.CreateFile(syscall.StringToUTF16Ptr(name),
+				syscall.GENERIC_READ|syscall.GENERIC_WRITE,
+				0,
+				nil,
+				syscall.OPEN_EXISTING,
+				syscall.FILE_ATTRIBUTE_NORMAL|syscall.FILE_FLAG_OVERLAPPED,
+				0)
+			if err != nil {
+				if errno, ok := err.(syscall.Errno); ok {
+					switch errno {
+					case syscall.ERROR_FILE_NOT_FOUND,
+						syscall.ERROR_ACCESS_DENIED:
+						t.Log("Skipping: ", err)
+						return
+					}
+				}
+				t.Fatal(err)
+			}
+			f := os.NewFile(uintptr(h), name)
+			defer f.Close()
+
+			err = checkFileIsNotPartOfNetpoll(f)
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
diff --git a/src/internal/poll/fd_writev_darwin.go b/src/internal/poll/fd_writev_darwin.go
new file mode 100644
index 0000000..805fa2c
--- /dev/null
+++ b/src/internal/poll/fd_writev_darwin.go
@@ -0,0 +1,17 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin
+// +build darwin
+
+package poll
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+// Implemented in syscall/syscall_darwin.go.
+//go:linkname writev syscall.writev
+func writev(fd int, iovecs []syscall.Iovec) (uintptr, error)
diff --git a/src/internal/poll/fd_writev_illumos.go b/src/internal/poll/fd_writev_illumos.go
new file mode 100644
index 0000000..a0b11ed
--- /dev/null
+++ b/src/internal/poll/fd_writev_illumos.go
@@ -0,0 +1,17 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build illumos
+// +build illumos
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"syscall"
+)
+
+func writev(fd int, iovecs []syscall.Iovec) (uintptr, error) {
+	return unix.Writev(fd, iovecs)
+}
diff --git a/src/internal/poll/fd_writev_unix.go b/src/internal/poll/fd_writev_unix.go
new file mode 100644
index 0000000..87f284a
--- /dev/null
+++ b/src/internal/poll/fd_writev_unix.go
@@ -0,0 +1,30 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux || netbsd || openbsd
+// +build dragonfly freebsd linux netbsd openbsd
+
+package poll
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func writev(fd int, iovecs []syscall.Iovec) (uintptr, error) {
+	var (
+		r uintptr
+		e syscall.Errno
+	)
+	for {
+		r, _, e = syscall.Syscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(len(iovecs)))
+		if e != syscall.EINTR {
+			break
+		}
+	}
+	if e != 0 {
+		return r, e
+	}
+	return r, nil
+}
diff --git a/src/internal/poll/hook_cloexec.go b/src/internal/poll/hook_cloexec.go
new file mode 100644
index 0000000..d519f60
--- /dev/null
+++ b/src/internal/poll/hook_cloexec.go
@@ -0,0 +1,13 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || illumos || linux || netbsd || openbsd
+// +build dragonfly freebsd illumos linux netbsd openbsd
+
+package poll
+
+import "syscall"
+
+// Accept4Func is used to hook the accept4 call.
+var Accept4Func func(int, int) (int, syscall.Sockaddr, error) = syscall.Accept4
diff --git a/src/internal/poll/hook_unix.go b/src/internal/poll/hook_unix.go
new file mode 100644
index 0000000..c88d65c
--- /dev/null
+++ b/src/internal/poll/hook_unix.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris
+// +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris
+
+package poll
+
+import "syscall"
+
+// CloseFunc is used to hook the close call.
+var CloseFunc func(int) error = syscall.Close
+
+// AcceptFunc is used to hook the accept call.
+var AcceptFunc func(int) (int, syscall.Sockaddr, error) = syscall.Accept
diff --git a/src/internal/poll/hook_windows.go b/src/internal/poll/hook_windows.go
new file mode 100644
index 0000000..0bd950e
--- /dev/null
+++ b/src/internal/poll/hook_windows.go
@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// CloseFunc is used to hook the close call.
+var CloseFunc func(syscall.Handle) error = syscall.Closesocket
+
+// AcceptFunc is used to hook the accept call.
+var AcceptFunc func(syscall.Handle, syscall.Handle, *byte, uint32, uint32, uint32, *uint32, *syscall.Overlapped) error = syscall.AcceptEx
+
+// ConnectExFunc is used to hook the ConnectEx call.
+var ConnectExFunc func(syscall.Handle, syscall.Sockaddr, *byte, uint32, *uint32, *syscall.Overlapped) error = syscall.ConnectEx
diff --git a/src/internal/poll/iovec_illumos.go b/src/internal/poll/iovec_illumos.go
new file mode 100644
index 0000000..f4058b2
--- /dev/null
+++ b/src/internal/poll/iovec_illumos.go
@@ -0,0 +1,17 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build illumos
+// +build illumos
+
+package poll
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func newIovecWithBase(base *byte) syscall.Iovec {
+	return syscall.Iovec{Base: (*int8)(unsafe.Pointer(base))}
+}
diff --git a/src/internal/poll/iovec_unix.go b/src/internal/poll/iovec_unix.go
new file mode 100644
index 0000000..6fd5d86
--- /dev/null
+++ b/src/internal/poll/iovec_unix.go
@@ -0,0 +1,14 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package poll
+
+import "syscall"
+
+func newIovecWithBase(base *byte) syscall.Iovec {
+	return syscall.Iovec{Base: base}
+}
diff --git a/src/internal/poll/read_test.go b/src/internal/poll/read_test.go
new file mode 100644
index 0000000..598a52e
--- /dev/null
+++ b/src/internal/poll/read_test.go
@@ -0,0 +1,61 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"os"
+	"runtime"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestRead(t *testing.T) {
+	t.Run("SpecialFile", func(t *testing.T) {
+		var wg sync.WaitGroup
+		for _, p := range specialFiles() {
+			for i := 0; i < 4; i++ {
+				wg.Add(1)
+				go func(p string) {
+					defer wg.Done()
+					for i := 0; i < 100; i++ {
+						if _, err := os.ReadFile(p); err != nil {
+							t.Error(err)
+							return
+						}
+						time.Sleep(time.Nanosecond)
+					}
+				}(p)
+			}
+		}
+		wg.Wait()
+	})
+}
+
+func specialFiles() []string {
+	var ps []string
+	switch runtime.GOOS {
+	case "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd":
+		ps = []string{
+			"/dev/null",
+		}
+	case "linux":
+		ps = []string{
+			"/dev/null",
+			"/proc/stat",
+			"/sys/devices/system/cpu/online",
+		}
+	}
+	nps := ps[:0]
+	for _, p := range ps {
+		f, err := os.Open(p)
+		if err != nil {
+			continue
+		}
+		f.Close()
+		nps = append(nps, p)
+	}
+	return nps
+}
diff --git a/src/internal/poll/sendfile_bsd.go b/src/internal/poll/sendfile_bsd.go
new file mode 100644
index 0000000..3ba30a2
--- /dev/null
+++ b/src/internal/poll/sendfile_bsd.go
@@ -0,0 +1,60 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd
+// +build dragonfly freebsd
+
+package poll
+
+import "syscall"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, pos, remain int64) (int64, error) {
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err
+	}
+	defer dstFD.writeUnlock()
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err
+	}
+
+	dst := dstFD.Sysfd
+	var written int64
+	var err error
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		pos1 := pos
+		n, err1 := syscall.Sendfile(dst, src, &pos1, n)
+		if n > 0 {
+			pos += int64(n)
+			written += int64(n)
+			remain -= int64(n)
+		} else if n == 0 && err1 == nil {
+			break
+		}
+		if err1 == syscall.EINTR {
+			continue
+		}
+		if err1 == syscall.EAGAIN {
+			if err1 = dstFD.pd.waitWrite(dstFD.isFile); err1 == nil {
+				continue
+			}
+		}
+		if err1 != nil {
+			// This includes syscall.ENOSYS (no kernel
+			// support) and syscall.EINVAL (fd types which
+			// don't implement sendfile)
+			err = err1
+			break
+		}
+	}
+	return written, err
+}
diff --git a/src/internal/poll/sendfile_linux.go b/src/internal/poll/sendfile_linux.go
new file mode 100644
index 0000000..6e78523
--- /dev/null
+++ b/src/internal/poll/sendfile_linux.go
@@ -0,0 +1,55 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, remain int64) (int64, error) {
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err
+	}
+	defer dstFD.writeUnlock()
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err
+	}
+
+	dst := dstFD.Sysfd
+	var written int64
+	var err error
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		n, err1 := syscall.Sendfile(dst, src, nil, n)
+		if n > 0 {
+			written += int64(n)
+			remain -= int64(n)
+		} else if n == 0 && err1 == nil {
+			break
+		}
+		if err1 == syscall.EINTR {
+			continue
+		}
+		if err1 == syscall.EAGAIN {
+			if err1 = dstFD.pd.waitWrite(dstFD.isFile); err1 == nil {
+				continue
+			}
+		}
+		if err1 != nil {
+			// This includes syscall.ENOSYS (no kernel
+			// support) and syscall.EINVAL (fd types which
+			// don't implement sendfile)
+			err = err1
+			break
+		}
+	}
+	return written, err
+}
diff --git a/src/internal/poll/sendfile_solaris.go b/src/internal/poll/sendfile_solaris.go
new file mode 100644
index 0000000..0a88430
--- /dev/null
+++ b/src/internal/poll/sendfile_solaris.go
@@ -0,0 +1,65 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Not strictly needed, but very helpful for debugging, see issue #10221.
+//go:cgo_import_dynamic _ _ "libsendfile.so"
+//go:cgo_import_dynamic _ _ "libsocket.so"
+
+// maxSendfileSize is the largest chunk size we ask the kernel to copy
+// at a time.
+const maxSendfileSize int = 4 << 20
+
+// SendFile wraps the sendfile system call.
+func SendFile(dstFD *FD, src int, pos, remain int64) (int64, error) {
+	if err := dstFD.writeLock(); err != nil {
+		return 0, err
+	}
+	defer dstFD.writeUnlock()
+	if err := dstFD.pd.prepareWrite(dstFD.isFile); err != nil {
+		return 0, err
+	}
+
+	dst := dstFD.Sysfd
+	var written int64
+	var err error
+	for remain > 0 {
+		n := maxSendfileSize
+		if int64(n) > remain {
+			n = int(remain)
+		}
+		pos1 := pos
+		n, err1 := syscall.Sendfile(dst, src, &pos1, n)
+		if err1 == syscall.EAGAIN || err1 == syscall.EINTR {
+			// partial write may have occurred
+			n = int(pos1 - pos)
+		}
+		if n > 0 {
+			pos += int64(n)
+			written += int64(n)
+			remain -= int64(n)
+		} else if n == 0 && err1 == nil {
+			break
+		}
+		if err1 == syscall.EAGAIN {
+			if err1 = dstFD.pd.waitWrite(dstFD.isFile); err1 == nil {
+				continue
+			}
+		}
+		if err1 == syscall.EINTR {
+			continue
+		}
+		if err1 != nil {
+			// This includes syscall.ENOSYS (no kernel
+			// support) and syscall.EINVAL (fd types which
+			// don't implement sendfile)
+			err = err1
+			break
+		}
+	}
+	return written, err
+}
diff --git a/src/internal/poll/sendfile_windows.go b/src/internal/poll/sendfile_windows.go
new file mode 100644
index 0000000..50c3ee8
--- /dev/null
+++ b/src/internal/poll/sendfile_windows.go
@@ -0,0 +1,81 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"io"
+	"syscall"
+)
+
+// SendFile wraps the TransmitFile call.
+func SendFile(fd *FD, src syscall.Handle, n int64) (written int64, err error) {
+	if fd.kind == kindPipe {
+		// TransmitFile does not work with pipes
+		return 0, syscall.ESPIPE
+	}
+
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+
+	o := &fd.wop
+	o.handle = src
+
+	// TODO(brainman): skip calling syscall.Seek if OS allows it
+	curpos, err := syscall.Seek(o.handle, 0, io.SeekCurrent)
+	if err != nil {
+		return 0, err
+	}
+
+	if n <= 0 { // We don't know the size of the file so infer it.
+		// Find the number of bytes offset from curpos until the end of the file.
+		n, err = syscall.Seek(o.handle, -curpos, io.SeekEnd)
+		if err != nil {
+			return
+		}
+		// Now seek back to the original position.
+		if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
+			return
+		}
+	}
+
+	// TransmitFile can be invoked in one call with at most
+	// 2,147,483,646 bytes: the maximum value for a 32-bit integer minus 1.
+	// See https://docs.microsoft.com/en-us/windows/win32/api/mswsock/nf-mswsock-transmitfile
+	const maxChunkSizePerCall = int64(0x7fffffff - 1)
+
+	for n > 0 {
+		chunkSize := maxChunkSizePerCall
+		if chunkSize > n {
+			chunkSize = n
+		}
+
+		o.qty = uint32(chunkSize)
+		o.o.Offset = uint32(curpos)
+		o.o.OffsetHigh = uint32(curpos >> 32)
+
+		nw, err := execIO(o, func(o *operation) error {
+			return syscall.TransmitFile(o.fd.Sysfd, o.handle, o.qty, 0, &o.o, nil, syscall.TF_WRITE_BEHIND)
+		})
+		if err != nil {
+			return written, err
+		}
+
+		curpos += int64(nw)
+
+		// Some versions of Windows (Windows 10 1803) do not set
+		// file position after TransmitFile completes.
+		// So just use Seek to set file position.
+		if _, err = syscall.Seek(o.handle, curpos, io.SeekStart); err != nil {
+			return written, err
+		}
+
+		n -= int64(nw)
+		written += int64(nw)
+	}
+
+	return
+}
diff --git a/src/internal/poll/sock_cloexec.go b/src/internal/poll/sock_cloexec.go
new file mode 100644
index 0000000..b303829
--- /dev/null
+++ b/src/internal/poll/sock_cloexec.go
@@ -0,0 +1,51 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that provide a fast path for
+// setting SetNonblock and CloseOnExec.
+
+//go:build dragonfly || freebsd || illumos || linux || netbsd || openbsd
+// +build dragonfly freebsd illumos linux netbsd openbsd
+
+package poll
+
+import "syscall"
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	ns, sa, err := Accept4Func(s, syscall.SOCK_NONBLOCK|syscall.SOCK_CLOEXEC)
+	// On Linux the accept4 system call was introduced in 2.6.28
+	// kernel and on FreeBSD it was introduced in 10 kernel. If we
+	// get an ENOSYS error on both Linux and FreeBSD, or EINVAL
+	// error on Linux, fall back to using accept.
+	switch err {
+	case nil:
+		return ns, sa, "", nil
+	default: // errors other than the ones listed
+		return -1, sa, "accept4", err
+	case syscall.ENOSYS: // syscall missing
+	case syscall.EINVAL: // some Linux use this instead of ENOSYS
+	case syscall.EACCES: // some Linux use this instead of ENOSYS
+	case syscall.EFAULT: // some Linux use this instead of ENOSYS
+	}
+
+	// See ../syscall/exec_unix.go for description of ForkLock.
+	// It is probably okay to hold the lock across syscall.Accept
+	// because we have put fd.sysfd into non-blocking mode.
+	// However, a call to the File method will put it back into
+	// blocking mode. We can't take that risk, so no use of ForkLock here.
+	ns, sa, err = AcceptFunc(s)
+	if err == nil {
+		syscall.CloseOnExec(ns)
+	}
+	if err != nil {
+		return -1, nil, "accept", err
+	}
+	if err = syscall.SetNonblock(ns, true); err != nil {
+		CloseFunc(ns)
+		return -1, nil, "setnonblock", err
+	}
+	return ns, sa, "", nil
+}
diff --git a/src/internal/poll/sockopt.go b/src/internal/poll/sockopt.go
new file mode 100644
index 0000000..4f2e2fb
--- /dev/null
+++ b/src/internal/poll/sockopt.go
@@ -0,0 +1,37 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+package poll
+
+import "syscall"
+
+// SetsockoptInt wraps the setsockopt network call with an int argument.
+func (fd *FD) SetsockoptInt(level, name, arg int) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptInt(fd.Sysfd, level, name, arg)
+}
+
+// SetsockoptInet4Addr wraps the setsockopt network call with an IPv4 address.
+func (fd *FD) SetsockoptInet4Addr(level, name int, arg [4]byte) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptInet4Addr(fd.Sysfd, level, name, arg)
+}
+
+// SetsockoptLinger wraps the setsockopt network call with a Linger argument.
+func (fd *FD) SetsockoptLinger(level, name int, l *syscall.Linger) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptLinger(fd.Sysfd, level, name, l)
+}
diff --git a/src/internal/poll/sockopt_linux.go b/src/internal/poll/sockopt_linux.go
new file mode 100644
index 0000000..bc79c35
--- /dev/null
+++ b/src/internal/poll/sockopt_linux.go
@@ -0,0 +1,16 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// SetsockoptIPMreqn wraps the setsockopt network call with an IPMreqn argument.
+func (fd *FD) SetsockoptIPMreqn(level, name int, mreq *syscall.IPMreqn) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPMreqn(fd.Sysfd, level, name, mreq)
+}
diff --git a/src/internal/poll/sockopt_unix.go b/src/internal/poll/sockopt_unix.go
new file mode 100644
index 0000000..4fb9600
--- /dev/null
+++ b/src/internal/poll/sockopt_unix.go
@@ -0,0 +1,19 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package poll
+
+import "syscall"
+
+// SetsockoptByte wraps the setsockopt network call with a byte argument.
+func (fd *FD) SetsockoptByte(level, name int, arg byte) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptByte(fd.Sysfd, level, name, arg)
+}
diff --git a/src/internal/poll/sockopt_windows.go b/src/internal/poll/sockopt_windows.go
new file mode 100644
index 0000000..dd5fb70
--- /dev/null
+++ b/src/internal/poll/sockopt_windows.go
@@ -0,0 +1,25 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import "syscall"
+
+// Setsockopt wraps the setsockopt network call.
+func (fd *FD) Setsockopt(level, optname int32, optval *byte, optlen int32) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.Setsockopt(fd.Sysfd, level, optname, optval, optlen)
+}
+
+// WSAIoctl wraps the WSAIoctl network call.
+func (fd *FD) WSAIoctl(iocc uint32, inbuf *byte, cbif uint32, outbuf *byte, cbob uint32, cbbr *uint32, overlapped *syscall.Overlapped, completionRoutine uintptr) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.WSAIoctl(fd.Sysfd, iocc, inbuf, cbif, outbuf, cbob, cbbr, overlapped, completionRoutine)
+}
diff --git a/src/internal/poll/sockoptip.go b/src/internal/poll/sockoptip.go
new file mode 100644
index 0000000..d86c4c1
--- /dev/null
+++ b/src/internal/poll/sockoptip.go
@@ -0,0 +1,28 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || windows
+// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+package poll
+
+import "syscall"
+
+// SetsockoptIPMreq wraps the setsockopt network call with an IPMreq argument.
+func (fd *FD) SetsockoptIPMreq(level, name int, mreq *syscall.IPMreq) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPMreq(fd.Sysfd, level, name, mreq)
+}
+
+// SetsockoptIPv6Mreq wraps the setsockopt network call with an IPv6Mreq argument.
+func (fd *FD) SetsockoptIPv6Mreq(level, name int, mreq *syscall.IPv6Mreq) error {
+	if err := fd.incref(); err != nil {
+		return err
+	}
+	defer fd.decref()
+	return syscall.SetsockoptIPv6Mreq(fd.Sysfd, level, name, mreq)
+}
diff --git a/src/internal/poll/splice_linux.go b/src/internal/poll/splice_linux.go
new file mode 100644
index 0000000..8062d98
--- /dev/null
+++ b/src/internal/poll/splice_linux.go
@@ -0,0 +1,242 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll
+
+import (
+	"internal/syscall/unix"
+	"runtime"
+	"sync"
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+)
+
+const (
+	// spliceNonblock makes calls to splice(2) non-blocking.
+	spliceNonblock = 0x2
+
+	// maxSpliceSize is the maximum amount of data Splice asks
+	// the kernel to move in a single call to splice(2).
+	maxSpliceSize = 4 << 20
+)
+
+// Splice transfers at most remain bytes of data from src to dst, using the
+// splice system call to minimize copies of data from and to userspace.
+//
+// Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer.
+// src and dst must both be stream-oriented sockets.
+//
+// If err != nil, sc is the system call which caused the error.
+func Splice(dst, src *FD, remain int64) (written int64, handled bool, sc string, err error) {
+	p, sc, err := getPipe()
+	if err != nil {
+		return 0, false, sc, err
+	}
+	defer putPipe(p)
+	var inPipe, n int
+	for err == nil && remain > 0 {
+		max := maxSpliceSize
+		if int64(max) > remain {
+			max = int(remain)
+		}
+		inPipe, err = spliceDrain(p.wfd, src, max)
+		// The operation is considered handled if splice returns no
+		// error, or an error other than EINVAL. An EINVAL means the
+		// kernel does not support splice for the socket type of src.
+		// The failed syscall does not consume any data so it is safe
+		// to fall back to a generic copy.
+		//
+		// spliceDrain should never return EAGAIN, so if err != nil,
+		// Splice cannot continue.
+		//
+		// If inPipe == 0 && err == nil, src is at EOF, and the
+		// transfer is complete.
+		handled = handled || (err != syscall.EINVAL)
+		if err != nil || inPipe == 0 {
+			break
+		}
+		p.data += inPipe
+
+		n, err = splicePump(dst, p.rfd, inPipe)
+		if n > 0 {
+			written += int64(n)
+			remain -= int64(n)
+			p.data -= n
+		}
+	}
+	if err != nil {
+		return written, handled, "splice", err
+	}
+	return written, true, "", nil
+}
+
+// spliceDrain moves data from a socket to a pipe.
+//
+// Invariant: when entering spliceDrain, the pipe is empty. It is either in its
+// initial state, or splicePump has emptied it previously.
+//
+// Given this, spliceDrain can reasonably assume that the pipe is ready for
+// writing, so if splice returns EAGAIN, it must be because the socket is not
+// ready for reading.
+//
+// If spliceDrain returns (0, nil), src is at EOF.
+func spliceDrain(pipefd int, sock *FD, max int) (int, error) {
+	if err := sock.readLock(); err != nil {
+		return 0, err
+	}
+	defer sock.readUnlock()
+	if err := sock.pd.prepareRead(sock.isFile); err != nil {
+		return 0, err
+	}
+	for {
+		n, err := splice(pipefd, sock.Sysfd, max, spliceNonblock)
+		if err == syscall.EINTR {
+			continue
+		}
+		if err != syscall.EAGAIN {
+			return n, err
+		}
+		if err := sock.pd.waitRead(sock.isFile); err != nil {
+			return n, err
+		}
+	}
+}
+
+// splicePump moves all the buffered data from a pipe to a socket.
+//
+// Invariant: when entering splicePump, there are exactly inPipe
+// bytes of data in the pipe, from a previous call to spliceDrain.
+//
+// By analogy to the condition from spliceDrain, splicePump
+// only needs to poll the socket for readiness, if splice returns
+// EAGAIN.
+//
+// If splicePump cannot move all the data in a single call to
+// splice(2), it loops over the buffered data until it has written
+// all of it to the socket. This behavior is similar to the Write
+// step of an io.Copy in userspace.
+func splicePump(sock *FD, pipefd int, inPipe int) (int, error) {
+	if err := sock.writeLock(); err != nil {
+		return 0, err
+	}
+	defer sock.writeUnlock()
+	if err := sock.pd.prepareWrite(sock.isFile); err != nil {
+		return 0, err
+	}
+	written := 0
+	for inPipe > 0 {
+		n, err := splice(sock.Sysfd, pipefd, inPipe, spliceNonblock)
+		// Here, the condition n == 0 && err == nil should never be
+		// observed, since Splice controls the write side of the pipe.
+		if n > 0 {
+			inPipe -= n
+			written += n
+			continue
+		}
+		if err != syscall.EAGAIN {
+			return written, err
+		}
+		if err := sock.pd.waitWrite(sock.isFile); err != nil {
+			return written, err
+		}
+	}
+	return written, nil
+}
+
+// splice wraps the splice system call. Since the current implementation
+// only uses splice on sockets and pipes, the offset arguments are unused.
+// splice returns int instead of int64, because callers never ask it to
+// move more data in a single call than can fit in an int32.
+func splice(out int, in int, max int, flags int) (int, error) {
+	n, err := syscall.Splice(in, nil, out, nil, max, flags)
+	return int(n), err
+}
+
+type splicePipe struct {
+	rfd  int
+	wfd  int
+	data int
+}
+
+// splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers.
+// The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up
+// a finalizer for each pipe to close its file descriptors before the actual GC.
+var splicePipePool = sync.Pool{New: newPoolPipe}
+
+func newPoolPipe() interface{} {
+	// Discard the error which occurred during the creation of pipe buffer,
+	// redirecting the data transmission to the conventional way utilizing read() + write() as a fallback.
+	p := newPipe()
+	if p == nil {
+		return nil
+	}
+	runtime.SetFinalizer(p, destroyPipe)
+	return p
+}
+
+// getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache.
+//
+// Note that it may fail to create a new pipe buffer by newPipe(), in which case getPipe() will return a generic error
+// and system call name splice in a string as the indication.
+func getPipe() (*splicePipe, string, error) {
+	v := splicePipePool.Get()
+	if v == nil {
+		return nil, "splice", syscall.EINVAL
+	}
+	return v.(*splicePipe), "", nil
+}
+
+func putPipe(p *splicePipe) {
+	// If there is still data left in the pipe,
+	// then close and discard it instead of putting it back into the pool.
+	if p.data != 0 {
+		runtime.SetFinalizer(p, nil)
+		destroyPipe(p)
+		return
+	}
+	splicePipePool.Put(p)
+}
+
+var disableSplice unsafe.Pointer
+
+// newPipe sets up a pipe for a splice operation.
+func newPipe() (sp *splicePipe) {
+	p := (*bool)(atomic.LoadPointer(&disableSplice))
+	if p != nil && *p {
+		return nil
+	}
+
+	var fds [2]int
+	// pipe2 was added in 2.6.27 and our minimum requirement is 2.6.23, so it
+	// might not be implemented. Falling back to pipe is possible, but prior to
+	// 2.6.29 splice returns -EAGAIN instead of 0 when the connection is
+	// closed.
+	const flags = syscall.O_CLOEXEC | syscall.O_NONBLOCK
+	if err := syscall.Pipe2(fds[:], flags); err != nil {
+		return nil
+	}
+
+	sp = &splicePipe{rfd: fds[0], wfd: fds[1]}
+
+	if p == nil {
+		p = new(bool)
+		defer atomic.StorePointer(&disableSplice, unsafe.Pointer(p))
+
+		// F_GETPIPE_SZ was added in 2.6.35, which does not have the -EAGAIN bug.
+		if _, _, errno := syscall.Syscall(unix.FcntlSyscall, uintptr(fds[0]), syscall.F_GETPIPE_SZ, 0); errno != 0 {
+			*p = true
+			destroyPipe(sp)
+			return nil
+		}
+	}
+
+	return
+}
+
+// destroyPipe destroys a pipe.
+func destroyPipe(p *splicePipe) {
+	CloseFunc(p.rfd)
+	CloseFunc(p.wfd)
+}
diff --git a/src/internal/poll/splice_linux_test.go b/src/internal/poll/splice_linux_test.go
new file mode 100644
index 0000000..280468c
--- /dev/null
+++ b/src/internal/poll/splice_linux_test.go
@@ -0,0 +1,119 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"internal/poll"
+	"internal/syscall/unix"
+	"runtime"
+	"syscall"
+	"testing"
+	"time"
+)
+
+// checkPipes returns true if all pipes are closed properly, false otherwise.
+func checkPipes(fds []int) bool {
+	for _, fd := range fds {
+		// Check if each pipe fd has been closed.
+		_, _, errno := syscall.Syscall(unix.FcntlSyscall, uintptr(fd), syscall.F_GETPIPE_SZ, 0)
+		if errno == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func TestSplicePipePool(t *testing.T) {
+	const N = 64
+	var (
+		p   *poll.SplicePipe
+		ps  []*poll.SplicePipe
+		fds []int
+		err error
+	)
+	for i := 0; i < N; i++ {
+		p, _, err = poll.GetPipe()
+		if err != nil {
+			t.Skip("failed to create pipe, skip this test")
+		}
+		_, pwfd := poll.GetPipeFds(p)
+		fds = append(fds, pwfd)
+		ps = append(ps, p)
+	}
+	for _, p = range ps {
+		poll.PutPipe(p)
+	}
+	ps = nil
+	p = nil
+
+	// Exploit the timeout of "go test" as a timer for the subsequent verification.
+	timeout := 5 * time.Minute
+	if deadline, ok := t.Deadline(); ok {
+		timeout = deadline.Sub(time.Now())
+		timeout -= timeout / 10 // Leave 10% headroom for cleanup.
+	}
+	expiredTime := time.NewTimer(timeout)
+	defer expiredTime.Stop()
+
+	// Trigger garbage collection repeatedly, waiting for all pipes in sync.Pool
+	// to either be deallocated and closed, or to time out.
+	for {
+		runtime.GC()
+		time.Sleep(10 * time.Millisecond)
+		if checkPipes(fds) {
+			break
+		}
+		select {
+		case <-expiredTime.C:
+			t.Fatal("at least one pipe is still open")
+		default:
+		}
+	}
+}
+
+func BenchmarkSplicePipe(b *testing.B) {
+	b.Run("SplicePipeWithPool", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p, _, err := poll.GetPipe()
+			if err != nil {
+				continue
+			}
+			poll.PutPipe(p)
+		}
+	})
+	b.Run("SplicePipeWithoutPool", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			p := poll.NewPipe()
+			if p == nil {
+				b.Skip("newPipe returned nil")
+			}
+			poll.DestroyPipe(p)
+		}
+	})
+}
+
+func BenchmarkSplicePipePoolParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			p, _, err := poll.GetPipe()
+			if err != nil {
+				continue
+			}
+			poll.PutPipe(p)
+		}
+	})
+}
+
+func BenchmarkSplicePipeNativeParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			p := poll.NewPipe()
+			if p == nil {
+				b.Skip("newPipe returned nil")
+			}
+			poll.DestroyPipe(p)
+		}
+	})
+}
diff --git a/src/internal/poll/strconv.go b/src/internal/poll/strconv.go
new file mode 100644
index 0000000..c98332d
--- /dev/null
+++ b/src/internal/poll/strconv.go
@@ -0,0 +1,14 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build plan9
+// +build plan9
+
+package poll
+
+// stringsHasSuffix is strings.HasSuffix. It reports whether s ends in
+// suffix.
+func stringsHasSuffix(s, suffix string) bool {
+	return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
+}
diff --git a/src/internal/poll/sys_cloexec.go b/src/internal/poll/sys_cloexec.go
new file mode 100644
index 0000000..7e6d422
--- /dev/null
+++ b/src/internal/poll/sys_cloexec.go
@@ -0,0 +1,37 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements accept for platforms that do not provide a fast path for
+// setting SetNonblock and CloseOnExec.
+
+//go:build aix || darwin || (js && wasm) || (solaris && !illumos)
+// +build aix darwin js,wasm solaris,!illumos
+
+package poll
+
+import (
+	"syscall"
+)
+
+// Wrapper around the accept system call that marks the returned file
+// descriptor as nonblocking and close-on-exec.
+func accept(s int) (int, syscall.Sockaddr, string, error) {
+	// See ../syscall/exec_unix.go for description of ForkLock.
+	// It is probably okay to hold the lock across syscall.Accept
+	// because we have put fd.sysfd into non-blocking mode.
+	// However, a call to the File method will put it back into
+	// blocking mode. We can't take that risk, so no use of ForkLock here.
+	ns, sa, err := AcceptFunc(s)
+	if err == nil {
+		syscall.CloseOnExec(ns)
+	}
+	if err != nil {
+		return -1, nil, "accept", err
+	}
+	if err = syscall.SetNonblock(ns, true); err != nil {
+		CloseFunc(ns)
+		return -1, nil, "setnonblock", err
+	}
+	return ns, sa, "", nil
+}
diff --git a/src/internal/poll/writev.go b/src/internal/poll/writev.go
new file mode 100644
index 0000000..824de75
--- /dev/null
+++ b/src/internal/poll/writev.go
@@ -0,0 +1,88 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin || dragonfly || freebsd || illumos || linux || netbsd || openbsd
+// +build darwin dragonfly freebsd illumos linux netbsd openbsd
+
+package poll
+
+import (
+	"io"
+	"syscall"
+)
+
+// Writev wraps the writev system call.
+func (fd *FD) Writev(v *[][]byte) (int64, error) {
+	if err := fd.writeLock(); err != nil {
+		return 0, err
+	}
+	defer fd.writeUnlock()
+	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
+		return 0, err
+	}
+
+	var iovecs []syscall.Iovec
+	if fd.iovecs != nil {
+		iovecs = *fd.iovecs
+	}
+	// TODO: read from sysconf(_SC_IOV_MAX)? The Linux default is
+	// 1024 and this seems conservative enough for now. Darwin's
+	// UIO_MAXIOV also seems to be 1024.
+	maxVec := 1024
+
+	var n int64
+	var err error
+	for len(*v) > 0 {
+		iovecs = iovecs[:0]
+		for _, chunk := range *v {
+			if len(chunk) == 0 {
+				continue
+			}
+			iovecs = append(iovecs, newIovecWithBase(&chunk[0]))
+			if fd.IsStream && len(chunk) > 1<<30 {
+				iovecs[len(iovecs)-1].SetLen(1 << 30)
+				break // continue chunk on next writev
+			}
+			iovecs[len(iovecs)-1].SetLen(len(chunk))
+			if len(iovecs) == maxVec {
+				break
+			}
+		}
+		if len(iovecs) == 0 {
+			break
+		}
+		if fd.iovecs == nil {
+			fd.iovecs = new([]syscall.Iovec)
+		}
+		*fd.iovecs = iovecs // cache
+
+		var wrote uintptr
+		wrote, err = writev(fd.Sysfd, iovecs)
+		if wrote == ^uintptr(0) {
+			wrote = 0
+		}
+		TestHookDidWritev(int(wrote))
+		n += int64(wrote)
+		consume(v, int64(wrote))
+		for i := range iovecs {
+			iovecs[i] = syscall.Iovec{}
+		}
+		if err != nil {
+			if err == syscall.EINTR {
+				continue
+			}
+			if err == syscall.EAGAIN {
+				if err = fd.pd.waitWrite(fd.isFile); err == nil {
+					continue
+				}
+			}
+			break
+		}
+		if n == 0 {
+			err = io.ErrUnexpectedEOF
+			break
+		}
+	}
+	return n, err
+}
diff --git a/src/internal/poll/writev_test.go b/src/internal/poll/writev_test.go
new file mode 100644
index 0000000..b46657c
--- /dev/null
+++ b/src/internal/poll/writev_test.go
@@ -0,0 +1,62 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poll_test
+
+import (
+	"internal/poll"
+	"reflect"
+	"testing"
+)
+
+func TestConsume(t *testing.T) {
+	tests := []struct {
+		in      [][]byte
+		consume int64
+		want    [][]byte
+	}{
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 0,
+			want:    [][]byte{[]byte("foo"), []byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 2,
+			want:    [][]byte{[]byte("o"), []byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 3,
+			want:    [][]byte{[]byte("bar")},
+		},
+		{
+			in:      [][]byte{[]byte("foo"), []byte("bar")},
+			consume: 4,
+			want:    [][]byte{[]byte("ar")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil, []byte("bar")},
+			consume: 1,
+			want:    [][]byte{[]byte("ar")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil, []byte("foo")},
+			consume: 0,
+			want:    [][]byte{[]byte("foo")},
+		},
+		{
+			in:      [][]byte{nil, nil, nil},
+			consume: 0,
+			want:    [][]byte{},
+		},
+	}
+	for i, tt := range tests {
+		in := tt.in
+		poll.Consume(&in, tt.consume)
+		if !reflect.DeepEqual(in, tt.want) {
+			t.Errorf("%d. after consume(%d) = %+v, want %+v", i, tt.consume, in, tt.want)
+		}
+	}
+}
diff --git a/src/internal/profile/encode.go b/src/internal/profile/encode.go
new file mode 100644
index 0000000..af31933
--- /dev/null
+++ b/src/internal/profile/encode.go
@@ -0,0 +1,482 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package profile
+
+import (
+	"errors"
+	"fmt"
+	"sort"
+)
+
+func (p *Profile) decoder() []decoder {
+	return profileDecoder
+}
+
+// preEncode populates the unexported fields to be used by encode
+// (with suffix X) from the corresponding exported fields. The
+// exported fields are cleared up to facilitate testing.
+func (p *Profile) preEncode() {
+	strings := make(map[string]int)
+	addString(strings, "")
+
+	for _, st := range p.SampleType {
+		st.typeX = addString(strings, st.Type)
+		st.unitX = addString(strings, st.Unit)
+	}
+
+	for _, s := range p.Sample {
+		s.labelX = nil
+		var keys []string
+		for k := range s.Label {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		for _, k := range keys {
+			vs := s.Label[k]
+			for _, v := range vs {
+				s.labelX = append(s.labelX,
+					Label{
+						keyX: addString(strings, k),
+						strX: addString(strings, v),
+					},
+				)
+			}
+		}
+		var numKeys []string
+		for k := range s.NumLabel {
+			numKeys = append(numKeys, k)
+		}
+		sort.Strings(numKeys)
+		for _, k := range numKeys {
+			vs := s.NumLabel[k]
+			for _, v := range vs {
+				s.labelX = append(s.labelX,
+					Label{
+						keyX: addString(strings, k),
+						numX: v,
+					},
+				)
+			}
+		}
+		s.locationIDX = nil
+		for _, l := range s.Location {
+			s.locationIDX = append(s.locationIDX, l.ID)
+		}
+	}
+
+	for _, m := range p.Mapping {
+		m.fileX = addString(strings, m.File)
+		m.buildIDX = addString(strings, m.BuildID)
+	}
+
+	for _, l := range p.Location {
+		for i, ln := range l.Line {
+			if ln.Function != nil {
+				l.Line[i].functionIDX = ln.Function.ID
+			} else {
+				l.Line[i].functionIDX = 0
+			}
+		}
+		if l.Mapping != nil {
+			l.mappingIDX = l.Mapping.ID
+		} else {
+			l.mappingIDX = 0
+		}
+	}
+	for _, f := range p.Function {
+		f.nameX = addString(strings, f.Name)
+		f.systemNameX = addString(strings, f.SystemName)
+		f.filenameX = addString(strings, f.Filename)
+	}
+
+	p.dropFramesX = addString(strings, p.DropFrames)
+	p.keepFramesX = addString(strings, p.KeepFrames)
+
+	if pt := p.PeriodType; pt != nil {
+		pt.typeX = addString(strings, pt.Type)
+		pt.unitX = addString(strings, pt.Unit)
+	}
+
+	p.stringTable = make([]string, len(strings))
+	for s, i := range strings {
+		p.stringTable[i] = s
+	}
+}
+
+func (p *Profile) encode(b *buffer) {
+	for _, x := range p.SampleType {
+		encodeMessage(b, 1, x)
+	}
+	for _, x := range p.Sample {
+		encodeMessage(b, 2, x)
+	}
+	for _, x := range p.Mapping {
+		encodeMessage(b, 3, x)
+	}
+	for _, x := range p.Location {
+		encodeMessage(b, 4, x)
+	}
+	for _, x := range p.Function {
+		encodeMessage(b, 5, x)
+	}
+	encodeStrings(b, 6, p.stringTable)
+	encodeInt64Opt(b, 7, p.dropFramesX)
+	encodeInt64Opt(b, 8, p.keepFramesX)
+	encodeInt64Opt(b, 9, p.TimeNanos)
+	encodeInt64Opt(b, 10, p.DurationNanos)
+	if pt := p.PeriodType; pt != nil && (pt.typeX != 0 || pt.unitX != 0) {
+		encodeMessage(b, 11, p.PeriodType)
+	}
+	encodeInt64Opt(b, 12, p.Period)
+}
+
+var profileDecoder = []decoder{
+	nil, // 0
+	// repeated ValueType sample_type = 1
+	func(b *buffer, m message) error {
+		x := new(ValueType)
+		pp := m.(*Profile)
+		pp.SampleType = append(pp.SampleType, x)
+		return decodeMessage(b, x)
+	},
+	// repeated Sample sample = 2
+	func(b *buffer, m message) error {
+		x := new(Sample)
+		pp := m.(*Profile)
+		pp.Sample = append(pp.Sample, x)
+		return decodeMessage(b, x)
+	},
+	// repeated Mapping mapping = 3
+	func(b *buffer, m message) error {
+		x := new(Mapping)
+		pp := m.(*Profile)
+		pp.Mapping = append(pp.Mapping, x)
+		return decodeMessage(b, x)
+	},
+	// repeated Location location = 4
+	func(b *buffer, m message) error {
+		x := new(Location)
+		pp := m.(*Profile)
+		pp.Location = append(pp.Location, x)
+		return decodeMessage(b, x)
+	},
+	// repeated Function function = 5
+	func(b *buffer, m message) error {
+		x := new(Function)
+		pp := m.(*Profile)
+		pp.Function = append(pp.Function, x)
+		return decodeMessage(b, x)
+	},
+	// repeated string string_table = 6
+	func(b *buffer, m message) error {
+		err := decodeStrings(b, &m.(*Profile).stringTable)
+		if err != nil {
+			return err
+		}
+		if *&m.(*Profile).stringTable[0] != "" {
+			return errors.New("string_table[0] must be ''")
+		}
+		return nil
+	},
+	// repeated int64 drop_frames = 7
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).dropFramesX) },
+	// repeated int64 keep_frames = 8
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).keepFramesX) },
+	// repeated int64 time_nanos = 9
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).TimeNanos) },
+	// repeated int64 duration_nanos = 10
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).DurationNanos) },
+	// optional string period_type = 11
+	func(b *buffer, m message) error {
+		x := new(ValueType)
+		pp := m.(*Profile)
+		pp.PeriodType = x
+		return decodeMessage(b, x)
+	},
+	// repeated int64 period = 12
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).Period) },
+	// repeated int64 comment = 13
+	func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) },
+	// int64 defaultSampleType = 14
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) },
+}
+
+// postDecode takes the unexported fields populated by decode (with
+// suffix X) and populates the corresponding exported fields.
+// The unexported fields are cleared up to facilitate testing.
+func (p *Profile) postDecode() error {
+	var err error
+
+	mappings := make(map[uint64]*Mapping)
+	for _, m := range p.Mapping {
+		m.File, err = getString(p.stringTable, &m.fileX, err)
+		m.BuildID, err = getString(p.stringTable, &m.buildIDX, err)
+		mappings[m.ID] = m
+	}
+
+	functions := make(map[uint64]*Function)
+	for _, f := range p.Function {
+		f.Name, err = getString(p.stringTable, &f.nameX, err)
+		f.SystemName, err = getString(p.stringTable, &f.systemNameX, err)
+		f.Filename, err = getString(p.stringTable, &f.filenameX, err)
+		functions[f.ID] = f
+	}
+
+	locations := make(map[uint64]*Location)
+	for _, l := range p.Location {
+		l.Mapping = mappings[l.mappingIDX]
+		l.mappingIDX = 0
+		for i, ln := range l.Line {
+			if id := ln.functionIDX; id != 0 {
+				l.Line[i].Function = functions[id]
+				if l.Line[i].Function == nil {
+					return fmt.Errorf("Function ID %d not found", id)
+				}
+				l.Line[i].functionIDX = 0
+			}
+		}
+		locations[l.ID] = l
+	}
+
+	for _, st := range p.SampleType {
+		st.Type, err = getString(p.stringTable, &st.typeX, err)
+		st.Unit, err = getString(p.stringTable, &st.unitX, err)
+	}
+
+	for _, s := range p.Sample {
+		labels := make(map[string][]string)
+		numLabels := make(map[string][]int64)
+		for _, l := range s.labelX {
+			var key, value string
+			key, err = getString(p.stringTable, &l.keyX, err)
+			if l.strX != 0 {
+				value, err = getString(p.stringTable, &l.strX, err)
+				labels[key] = append(labels[key], value)
+			} else {
+				numLabels[key] = append(numLabels[key], l.numX)
+			}
+		}
+		if len(labels) > 0 {
+			s.Label = labels
+		}
+		if len(numLabels) > 0 {
+			s.NumLabel = numLabels
+		}
+		s.Location = nil
+		for _, lid := range s.locationIDX {
+			s.Location = append(s.Location, locations[lid])
+		}
+		s.locationIDX = nil
+	}
+
+	p.DropFrames, err = getString(p.stringTable, &p.dropFramesX, err)
+	p.KeepFrames, err = getString(p.stringTable, &p.keepFramesX, err)
+
+	if pt := p.PeriodType; pt == nil {
+		p.PeriodType = &ValueType{}
+	}
+
+	if pt := p.PeriodType; pt != nil {
+		pt.Type, err = getString(p.stringTable, &pt.typeX, err)
+		pt.Unit, err = getString(p.stringTable, &pt.unitX, err)
+	}
+	for _, i := range p.commentX {
+		var c string
+		c, err = getString(p.stringTable, &i, err)
+		p.Comments = append(p.Comments, c)
+	}
+
+	p.commentX = nil
+	p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err)
+	p.stringTable = nil
+	return nil
+}
+
+func (p *ValueType) decoder() []decoder {
+	return valueTypeDecoder
+}
+
+func (p *ValueType) encode(b *buffer) {
+	encodeInt64Opt(b, 1, p.typeX)
+	encodeInt64Opt(b, 2, p.unitX)
+}
+
+var valueTypeDecoder = []decoder{
+	nil, // 0
+	// optional int64 type = 1
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).typeX) },
+	// optional int64 unit = 2
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*ValueType).unitX) },
+}
+
+func (p *Sample) decoder() []decoder {
+	return sampleDecoder
+}
+
+func (p *Sample) encode(b *buffer) {
+	encodeUint64s(b, 1, p.locationIDX)
+	for _, x := range p.Value {
+		encodeInt64(b, 2, x)
+	}
+	for _, x := range p.labelX {
+		encodeMessage(b, 3, x)
+	}
+}
+
+var sampleDecoder = []decoder{
+	nil, // 0
+	// repeated uint64 location = 1
+	func(b *buffer, m message) error { return decodeUint64s(b, &m.(*Sample).locationIDX) },
+	// repeated int64 value = 2
+	func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Sample).Value) },
+	// repeated Label label = 3
+	func(b *buffer, m message) error {
+		s := m.(*Sample)
+		n := len(s.labelX)
+		s.labelX = append(s.labelX, Label{})
+		return decodeMessage(b, &s.labelX[n])
+	},
+}
+
+func (p Label) decoder() []decoder {
+	return labelDecoder
+}
+
+func (p Label) encode(b *buffer) {
+	encodeInt64Opt(b, 1, p.keyX)
+	encodeInt64Opt(b, 2, p.strX)
+	encodeInt64Opt(b, 3, p.numX)
+}
+
+var labelDecoder = []decoder{
+	nil, // 0
+	// optional int64 key = 1
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).keyX) },
+	// optional int64 str = 2
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).strX) },
+	// optional int64 num = 3
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Label).numX) },
+}
+
+func (p *Mapping) decoder() []decoder {
+	return mappingDecoder
+}
+
+func (p *Mapping) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeUint64Opt(b, 2, p.Start)
+	encodeUint64Opt(b, 3, p.Limit)
+	encodeUint64Opt(b, 4, p.Offset)
+	encodeInt64Opt(b, 5, p.fileX)
+	encodeInt64Opt(b, 6, p.buildIDX)
+	encodeBoolOpt(b, 7, p.HasFunctions)
+	encodeBoolOpt(b, 8, p.HasFilenames)
+	encodeBoolOpt(b, 9, p.HasLineNumbers)
+	encodeBoolOpt(b, 10, p.HasInlineFrames)
+}
+
+var mappingDecoder = []decoder{
+	nil, // 0
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).ID) },            // optional uint64 id = 1
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Start) },         // optional uint64 memory_offset = 2
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Limit) },         // optional uint64 memory_limit = 3
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Mapping).Offset) },        // optional uint64 file_offset = 4
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).fileX) },          // optional int64 filename = 5
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Mapping).buildIDX) },       // optional int64 build_id = 6
+	func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFunctions) },    // optional bool has_functions = 7
+	func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasFilenames) },    // optional bool has_filenames = 8
+	func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasLineNumbers) },  // optional bool has_line_numbers = 9
+	func(b *buffer, m message) error { return decodeBool(b, &m.(*Mapping).HasInlineFrames) }, // optional bool has_inline_frames = 10
+}
+
+func (p *Location) decoder() []decoder {
+	return locationDecoder
+}
+
+func (p *Location) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeUint64Opt(b, 2, p.mappingIDX)
+	encodeUint64Opt(b, 3, p.Address)
+	for i := range p.Line {
+		encodeMessage(b, 4, &p.Line[i])
+	}
+}
+
+var locationDecoder = []decoder{
+	nil, // 0
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).ID) },         // optional uint64 id = 1;
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).mappingIDX) }, // optional uint64 mapping_id = 2;
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Location).Address) },    // optional uint64 address = 3;
+	func(b *buffer, m message) error { // repeated Line line = 4
+		pp := m.(*Location)
+		n := len(pp.Line)
+		pp.Line = append(pp.Line, Line{})
+		return decodeMessage(b, &pp.Line[n])
+	},
+}
+
+func (p *Line) decoder() []decoder {
+	return lineDecoder
+}
+
+func (p *Line) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.functionIDX)
+	encodeInt64Opt(b, 2, p.Line)
+}
+
+var lineDecoder = []decoder{
+	nil, // 0
+	// optional uint64 function_id = 1
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Line).functionIDX) },
+	// optional int64 line = 2
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Line).Line) },
+}
+
+func (p *Function) decoder() []decoder {
+	return functionDecoder
+}
+
+func (p *Function) encode(b *buffer) {
+	encodeUint64Opt(b, 1, p.ID)
+	encodeInt64Opt(b, 2, p.nameX)
+	encodeInt64Opt(b, 3, p.systemNameX)
+	encodeInt64Opt(b, 4, p.filenameX)
+	encodeInt64Opt(b, 5, p.StartLine)
+}
+
+var functionDecoder = []decoder{
+	nil, // 0
+	// optional uint64 id = 1
+	func(b *buffer, m message) error { return decodeUint64(b, &m.(*Function).ID) },
+	// optional int64 function_name = 2
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).nameX) },
+	// optional int64 function_system_name = 3
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).systemNameX) },
+	// repeated int64 filename = 4
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).filenameX) },
+	// optional int64 start_line = 5
+	func(b *buffer, m message) error { return decodeInt64(b, &m.(*Function).StartLine) },
+}
+
+func addString(strings map[string]int, s string) int64 {
+	i, ok := strings[s]
+	if !ok {
+		i = len(strings)
+		strings[s] = i
+	}
+	return int64(i)
+}
+
+func getString(strings []string, strng *int64, err error) (string, error) {
+	if err != nil {
+		return "", err
+	}
+	s := int(*strng)
+	if s < 0 || s >= len(strings) {
+		return "", errMalformed
+	}
+	*strng = 0
+	return strings[s], nil
+}
diff --git a/src/internal/profile/filter.go b/src/internal/profile/filter.go
new file mode 100644
index 0000000..9cad866
--- /dev/null
+++ b/src/internal/profile/filter.go
@@ -0,0 +1,158 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implements methods to filter samples from profiles.
+
+package profile
+
+import "regexp"
+
+// FilterSamplesByName filters the samples in a profile and only keeps
+// samples where at least one frame matches focus but none match ignore.
+// Returns true is the corresponding regexp matched at least one sample.
+func (p *Profile) FilterSamplesByName(focus, ignore, hide *regexp.Regexp) (fm, im, hm bool) {
+	focusOrIgnore := make(map[uint64]bool)
+	hidden := make(map[uint64]bool)
+	for _, l := range p.Location {
+		if ignore != nil && l.matchesName(ignore) {
+			im = true
+			focusOrIgnore[l.ID] = false
+		} else if focus == nil || l.matchesName(focus) {
+			fm = true
+			focusOrIgnore[l.ID] = true
+		}
+		if hide != nil && l.matchesName(hide) {
+			hm = true
+			l.Line = l.unmatchedLines(hide)
+			if len(l.Line) == 0 {
+				hidden[l.ID] = true
+			}
+		}
+	}
+
+	s := make([]*Sample, 0, len(p.Sample))
+	for _, sample := range p.Sample {
+		if focusedAndNotIgnored(sample.Location, focusOrIgnore) {
+			if len(hidden) > 0 {
+				var locs []*Location
+				for _, loc := range sample.Location {
+					if !hidden[loc.ID] {
+						locs = append(locs, loc)
+					}
+				}
+				if len(locs) == 0 {
+					// Remove sample with no locations (by not adding it to s).
+					continue
+				}
+				sample.Location = locs
+			}
+			s = append(s, sample)
+		}
+	}
+	p.Sample = s
+
+	return
+}
+
+// matchesName reports whether the function name or file in the
+// location matches the regular expression.
+func (loc *Location) matchesName(re *regexp.Regexp) bool {
+	for _, ln := range loc.Line {
+		if fn := ln.Function; fn != nil {
+			if re.MatchString(fn.Name) {
+				return true
+			}
+			if re.MatchString(fn.Filename) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// unmatchedLines returns the lines in the location that do not match
+// the regular expression.
+func (loc *Location) unmatchedLines(re *regexp.Regexp) []Line {
+	var lines []Line
+	for _, ln := range loc.Line {
+		if fn := ln.Function; fn != nil {
+			if re.MatchString(fn.Name) {
+				continue
+			}
+			if re.MatchString(fn.Filename) {
+				continue
+			}
+		}
+		lines = append(lines, ln)
+	}
+	return lines
+}
+
+// focusedAndNotIgnored looks up a slice of ids against a map of
+// focused/ignored locations. The map only contains locations that are
+// explicitly focused or ignored. Returns whether there is at least
+// one focused location but no ignored locations.
+func focusedAndNotIgnored(locs []*Location, m map[uint64]bool) bool {
+	var f bool
+	for _, loc := range locs {
+		if focus, focusOrIgnore := m[loc.ID]; focusOrIgnore {
+			if focus {
+				// Found focused location. Must keep searching in case there
+				// is an ignored one as well.
+				f = true
+			} else {
+				// Found ignored location. Can return false right away.
+				return false
+			}
+		}
+	}
+	return f
+}
+
+// TagMatch selects tags for filtering
+type TagMatch func(key, val string, nval int64) bool
+
+// FilterSamplesByTag removes all samples from the profile, except
+// those that match focus and do not match the ignore regular
+// expression.
+func (p *Profile) FilterSamplesByTag(focus, ignore TagMatch) (fm, im bool) {
+	samples := make([]*Sample, 0, len(p.Sample))
+	for _, s := range p.Sample {
+		focused, ignored := focusedSample(s, focus, ignore)
+		fm = fm || focused
+		im = im || ignored
+		if focused && !ignored {
+			samples = append(samples, s)
+		}
+	}
+	p.Sample = samples
+	return
+}
+
+// focusedTag checks a sample against focus and ignore regexps.
+// Returns whether the focus/ignore regexps match any tags
+func focusedSample(s *Sample, focus, ignore TagMatch) (fm, im bool) {
+	fm = focus == nil
+	for key, vals := range s.Label {
+		for _, val := range vals {
+			if ignore != nil && ignore(key, val, 0) {
+				im = true
+			}
+			if !fm && focus(key, val, 0) {
+				fm = true
+			}
+		}
+	}
+	for key, vals := range s.NumLabel {
+		for _, val := range vals {
+			if ignore != nil && ignore(key, "", val) {
+				im = true
+			}
+			if !fm && focus(key, "", val) {
+				fm = true
+			}
+		}
+	}
+	return fm, im
+}
diff --git a/src/internal/profile/legacy_profile.go b/src/internal/profile/legacy_profile.go
new file mode 100644
index 0000000..d69f8de
--- /dev/null
+++ b/src/internal/profile/legacy_profile.go
@@ -0,0 +1,1266 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements parsers to convert legacy profiles into the
+// profile.proto format.
+
+package profile
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var (
+	countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
+	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
+
+	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
+	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
+
+	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
+
+	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
+
+	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
+
+	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
+
+	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
+	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
+
+	procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
+
+	briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
+
+	// LegacyHeapAllocated instructs the heapz parsers to use the
+	// allocated memory stats instead of the default in-use memory. Note
+	// that tcmalloc doesn't provide all allocated memory, only in-use
+	// stats.
+	LegacyHeapAllocated bool
+)
+
+func isSpaceOrComment(line string) bool {
+	trimmed := strings.TrimSpace(line)
+	return len(trimmed) == 0 || trimmed[0] == '#'
+}
+
+// parseGoCount parses a Go count profile (e.g., threadcreate or
+// goroutine) and returns a new Profile.
+func parseGoCount(b []byte) (*Profile, error) {
+	r := bytes.NewBuffer(b)
+
+	var line string
+	var err error
+	for {
+		// Skip past comments and empty lines seeking a real header.
+		line, err = r.ReadString('\n')
+		if err != nil {
+			return nil, err
+		}
+		if !isSpaceOrComment(line) {
+			break
+		}
+	}
+
+	m := countStartRE.FindStringSubmatch(line)
+	if m == nil {
+		return nil, errUnrecognized
+	}
+	profileType := m[1]
+	p := &Profile{
+		PeriodType: &ValueType{Type: profileType, Unit: "count"},
+		Period:     1,
+		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
+	}
+	locations := make(map[uint64]*Location)
+	for {
+		line, err = r.ReadString('\n')
+		if err != nil {
+			if err == io.EOF {
+				break
+			}
+			return nil, err
+		}
+		if isSpaceOrComment(line) {
+			continue
+		}
+		if strings.HasPrefix(line, "---") {
+			break
+		}
+		m := countRE.FindStringSubmatch(line)
+		if m == nil {
+			return nil, errMalformed
+		}
+		n, err := strconv.ParseInt(m[1], 0, 64)
+		if err != nil {
+			return nil, errMalformed
+		}
+		fields := strings.Fields(m[2])
+		locs := make([]*Location, 0, len(fields))
+		for _, stk := range fields {
+			addr, err := strconv.ParseUint(stk, 0, 64)
+			if err != nil {
+				return nil, errMalformed
+			}
+			// Adjust all frames by -1 to land on the call instruction.
+			addr--
+			loc := locations[addr]
+			if loc == nil {
+				loc = &Location{
+					Address: addr,
+				}
+				locations[addr] = loc
+				p.Location = append(p.Location, loc)
+			}
+			locs = append(locs, loc)
+		}
+		p.Sample = append(p.Sample, &Sample{
+			Location: locs,
+			Value:    []int64{n},
+		})
+	}
+
+	if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
+		return nil, err
+	}
+	return p, nil
+}
+
+// remapLocationIDs ensures there is a location for each address
+// referenced by a sample, and remaps the samples to point to the new
+// location ids.
+func (p *Profile) remapLocationIDs() {
+	seen := make(map[*Location]bool, len(p.Location))
+	var locs []*Location
+
+	for _, s := range p.Sample {
+		for _, l := range s.Location {
+			if seen[l] {
+				continue
+			}
+			l.ID = uint64(len(locs) + 1)
+			locs = append(locs, l)
+			seen[l] = true
+		}
+	}
+	p.Location = locs
+}
+
+func (p *Profile) remapFunctionIDs() {
+	seen := make(map[*Function]bool, len(p.Function))
+	var fns []*Function
+
+	for _, l := range p.Location {
+		for _, ln := range l.Line {
+			fn := ln.Function
+			if fn == nil || seen[fn] {
+				continue
+			}
+			fn.ID = uint64(len(fns) + 1)
+			fns = append(fns, fn)
+			seen[fn] = true
+		}
+	}
+	p.Function = fns
+}
+
+// remapMappingIDs matches location addresses with existing mappings
+// and updates them appropriately. This is O(N*M), if this ever shows
+// up as a bottleneck, evaluate sorting the mappings and doing a
+// binary search, which would make it O(N*log(M)).
+func (p *Profile) remapMappingIDs() {
+	if len(p.Mapping) == 0 {
+		return
+	}
+
+	// Some profile handlers will incorrectly set regions for the main
+	// executable if its section is remapped. Fix them through heuristics.
+
+	// Remove the initial mapping if named '/anon_hugepage' and has a
+	// consecutive adjacent mapping.
+	if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
+		if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
+			p.Mapping = p.Mapping[1:]
+		}
+	}
+
+	// Subtract the offset from the start of the main mapping if it
+	// ends up at a recognizable start address.
+	const expectedStart = 0x400000
+	if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
+		m.Start = expectedStart
+		m.Offset = 0
+	}
+
+	for _, l := range p.Location {
+		if a := l.Address; a != 0 {
+			for _, m := range p.Mapping {
+				if m.Start <= a && a < m.Limit {
+					l.Mapping = m
+					break
+				}
+			}
+		}
+	}
+
+	// Reset all mapping IDs.
+	for i, m := range p.Mapping {
+		m.ID = uint64(i + 1)
+	}
+}
+
+var cpuInts = []func([]byte) (uint64, []byte){
+	get32l,
+	get32b,
+	get64l,
+	get64b,
+}
+
+func get32l(b []byte) (uint64, []byte) {
+	if len(b) < 4 {
+		return 0, nil
+	}
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
+}
+
+func get32b(b []byte) (uint64, []byte) {
+	if len(b) < 4 {
+		return 0, nil
+	}
+	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
+}
+
+func get64l(b []byte) (uint64, []byte) {
+	if len(b) < 8 {
+		return 0, nil
+	}
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
+}
+
+func get64b(b []byte) (uint64, []byte) {
+	if len(b) < 8 {
+		return 0, nil
+	}
+	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
+}
+
+// ParseTracebacks parses a set of tracebacks and returns a newly
+// populated profile. It will accept any text file and generate a
+// Profile out of it with any hex addresses it can identify, including
+// a process map if it can recognize one. Each sample will include a
+// tag "source" with the addresses recognized in string format.
+func ParseTracebacks(b []byte) (*Profile, error) {
+	r := bytes.NewBuffer(b)
+
+	p := &Profile{
+		PeriodType: &ValueType{Type: "trace", Unit: "count"},
+		Period:     1,
+		SampleType: []*ValueType{
+			{Type: "trace", Unit: "count"},
+		},
+	}
+
+	var sources []string
+	var sloc []*Location
+
+	locs := make(map[uint64]*Location)
+	for {
+		l, err := r.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return nil, err
+			}
+			if l == "" {
+				break
+			}
+		}
+		if sectionTrigger(l) == memoryMapSection {
+			break
+		}
+		if s, addrs := extractHexAddresses(l); len(s) > 0 {
+			for _, addr := range addrs {
+				// Addresses from stack traces point to the next instruction after
+				// each call. Adjust by -1 to land somewhere on the actual call.
+				addr--
+				loc := locs[addr]
+				if locs[addr] == nil {
+					loc = &Location{
+						Address: addr,
+					}
+					p.Location = append(p.Location, loc)
+					locs[addr] = loc
+				}
+				sloc = append(sloc, loc)
+			}
+
+			sources = append(sources, s...)
+		} else {
+			if len(sources) > 0 || len(sloc) > 0 {
+				addTracebackSample(sloc, sources, p)
+				sloc, sources = nil, nil
+			}
+		}
+	}
+
+	// Add final sample to save any leftover data.
+	if len(sources) > 0 || len(sloc) > 0 {
+		addTracebackSample(sloc, sources, p)
+	}
+
+	if err := p.ParseMemoryMap(r); err != nil {
+		return nil, err
+	}
+	return p, nil
+}
+
+func addTracebackSample(l []*Location, s []string, p *Profile) {
+	p.Sample = append(p.Sample,
+		&Sample{
+			Value:    []int64{1},
+			Location: l,
+			Label:    map[string][]string{"source": s},
+		})
+}
+
+// parseCPU parses a profilez legacy profile and returns a newly
+// populated Profile.
+//
+// The general format for profilez samples is a sequence of words in
+// binary format. The first words are a header with the following data:
+//   1st word -- 0
+//   2nd word -- 3
+//   3rd word -- 0 if a c++ application, 1 if a java application.
+//   4th word -- Sampling period (in microseconds).
+//   5th word -- Padding.
+func parseCPU(b []byte) (*Profile, error) {
+	var parse func([]byte) (uint64, []byte)
+	var n1, n2, n3, n4, n5 uint64
+	for _, parse = range cpuInts {
+		var tmp []byte
+		n1, tmp = parse(b)
+		n2, tmp = parse(tmp)
+		n3, tmp = parse(tmp)
+		n4, tmp = parse(tmp)
+		n5, tmp = parse(tmp)
+
+		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
+			b = tmp
+			return cpuProfile(b, int64(n4), parse)
+		}
+	}
+	return nil, errUnrecognized
+}
+
+// cpuProfile returns a new Profile from C++ profilez data.
+// b is the profile bytes after the header, period is the profiling
+// period, and parse is a function to parse 8-byte chunks from the
+// profile in its native endianness.
+func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
+	p := &Profile{
+		Period:     period * 1000,
+		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
+		SampleType: []*ValueType{
+			{Type: "samples", Unit: "count"},
+			{Type: "cpu", Unit: "nanoseconds"},
+		},
+	}
+	var err error
+	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
+		return nil, err
+	}
+
+	// If all samples have the same second-to-the-bottom frame, it
+	// strongly suggests that it is an uninteresting artifact of
+	// measurement -- a stack frame pushed by the signal handler. The
+	// bottom frame is always correct as it is picked up from the signal
+	// structure, not the stack. Check if this is the case and if so,
+	// remove.
+	if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
+		allSame := true
+		id1 := p.Sample[0].Location[1].Address
+		for _, s := range p.Sample {
+			if len(s.Location) < 2 || id1 != s.Location[1].Address {
+				allSame = false
+				break
+			}
+		}
+		if allSame {
+			for _, s := range p.Sample {
+				s.Location = append(s.Location[:1], s.Location[2:]...)
+			}
+		}
+	}
+
+	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
+		return nil, err
+	}
+	return p, nil
+}
+
+// parseCPUSamples parses a collection of profilez samples from a
+// profile.
+//
+// profilez samples are a repeated sequence of stack frames of the
+// form:
+//    1st word -- The number of times this stack was encountered.
+//    2nd word -- The size of the stack (StackSize).
+//    3rd word -- The first address on the stack.
+//    ...
+//    StackSize + 2 -- The last address on the stack
+// The last stack trace is of the form:
+//   1st word -- 0
+//   2nd word -- 1
+//   3rd word -- 0
+//
+// Addresses from stack traces may point to the next instruction after
+// each call. Optionally adjust by -1 to land somewhere on the actual
+// call (except for the leaf, which is not a call).
+func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
+	locs := make(map[uint64]*Location)
+	for len(b) > 0 {
+		var count, nstk uint64
+		count, b = parse(b)
+		nstk, b = parse(b)
+		if b == nil || nstk > uint64(len(b)/4) {
+			return nil, nil, errUnrecognized
+		}
+		var sloc []*Location
+		addrs := make([]uint64, nstk)
+		for i := 0; i < int(nstk); i++ {
+			addrs[i], b = parse(b)
+		}
+
+		if count == 0 && nstk == 1 && addrs[0] == 0 {
+			// End of data marker
+			break
+		}
+		for i, addr := range addrs {
+			if adjust && i > 0 {
+				addr--
+			}
+			loc := locs[addr]
+			if loc == nil {
+				loc = &Location{
+					Address: addr,
+				}
+				locs[addr] = loc
+				p.Location = append(p.Location, loc)
+			}
+			sloc = append(sloc, loc)
+		}
+		p.Sample = append(p.Sample,
+			&Sample{
+				Value:    []int64{int64(count), int64(count) * p.Period},
+				Location: sloc,
+			})
+	}
+	// Reached the end without finding the EOD marker.
+	return b, locs, nil
+}
+
+// parseHeap parses a heapz legacy or a growthz profile and
+// returns a newly populated Profile.
+func parseHeap(b []byte) (p *Profile, err error) {
+	r := bytes.NewBuffer(b)
+	l, err := r.ReadString('\n')
+	if err != nil {
+		return nil, errUnrecognized
+	}
+
+	sampling := ""
+
+	if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
+		p = &Profile{
+			SampleType: []*ValueType{
+				{Type: "objects", Unit: "count"},
+				{Type: "space", Unit: "bytes"},
+			},
+			PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
+		}
+
+		var period int64
+		if len(header[6]) > 0 {
+			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		}
+
+		switch header[5] {
+		case "heapz_v2", "heap_v2":
+			sampling, p.Period = "v2", period
+		case "heapprofile":
+			sampling, p.Period = "", 1
+		case "heap":
+			sampling, p.Period = "v2", period/2
+		default:
+			return nil, errUnrecognized
+		}
+	} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
+		p = &Profile{
+			SampleType: []*ValueType{
+				{Type: "objects", Unit: "count"},
+				{Type: "space", Unit: "bytes"},
+			},
+			PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
+			Period:     1,
+		}
+	} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
+		p = &Profile{
+			SampleType: []*ValueType{
+				{Type: "objects", Unit: "count"},
+				{Type: "space", Unit: "bytes"},
+			},
+			PeriodType: &ValueType{Type: "allocations", Unit: "count"},
+			Period:     1,
+		}
+	} else {
+		return nil, errUnrecognized
+	}
+
+	if LegacyHeapAllocated {
+		for _, st := range p.SampleType {
+			st.Type = "alloc_" + st.Type
+		}
+	} else {
+		for _, st := range p.SampleType {
+			st.Type = "inuse_" + st.Type
+		}
+	}
+
+	locs := make(map[uint64]*Location)
+	for {
+		l, err = r.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return nil, err
+			}
+
+			if l == "" {
+				break
+			}
+		}
+
+		if isSpaceOrComment(l) {
+			continue
+		}
+		l = strings.TrimSpace(l)
+
+		if sectionTrigger(l) != unrecognizedSection {
+			break
+		}
+
+		value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
+		if err != nil {
+			return nil, err
+		}
+		var sloc []*Location
+		for _, addr := range addrs {
+			// Addresses from stack traces point to the next instruction after
+			// each call. Adjust by -1 to land somewhere on the actual call.
+			addr--
+			loc := locs[addr]
+			if locs[addr] == nil {
+				loc = &Location{
+					Address: addr,
+				}
+				p.Location = append(p.Location, loc)
+				locs[addr] = loc
+			}
+			sloc = append(sloc, loc)
+		}
+
+		p.Sample = append(p.Sample, &Sample{
+			Value:    value,
+			Location: sloc,
+			NumLabel: map[string][]int64{"bytes": {blocksize}},
+		})
+	}
+
+	if err = parseAdditionalSections(l, r, p); err != nil {
+		return nil, err
+	}
+	return p, nil
+}
+
+// parseHeapSample parses a single row from a heap profile into a new Sample.
+func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
+	sampleData := heapSampleRE.FindStringSubmatch(line)
+	if len(sampleData) != 6 {
+		return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
+	}
+
+	// Use first two values by default; tcmalloc sampling generates the
+	// same value for both, only the older heap-profile collect separate
+	// stats for in-use and allocated objects.
+	valueIndex := 1
+	if LegacyHeapAllocated {
+		valueIndex = 3
+	}
+
+	var v1, v2 int64
+	if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
+		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
+	}
+	if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
+		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
+	}
+
+	if v1 == 0 {
+		if v2 != 0 {
+			return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
+		}
+	} else {
+		blocksize = v2 / v1
+		if sampling == "v2" {
+			v1, v2 = scaleHeapSample(v1, v2, rate)
+		}
+	}
+
+	value = []int64{v1, v2}
+	addrs = parseHexAddresses(sampleData[5])
+
+	return value, blocksize, addrs, nil
+}
+
+// extractHexAddresses extracts hex numbers from a string and returns
+// them, together with their numeric value, in a slice.
+func extractHexAddresses(s string) ([]string, []uint64) {
+	hexStrings := hexNumberRE.FindAllString(s, -1)
+	var ids []uint64
+	for _, s := range hexStrings {
+		if id, err := strconv.ParseUint(s, 0, 64); err == nil {
+			ids = append(ids, id)
+		} else {
+			// Do not expect any parsing failures due to the regexp matching.
+			panic("failed to parse hex value:" + s)
+		}
+	}
+	return hexStrings, ids
+}
+
+// parseHexAddresses parses hex numbers from a string and returns them
+// in a slice.
+func parseHexAddresses(s string) []uint64 {
+	_, ids := extractHexAddresses(s)
+	return ids
+}
+
+// scaleHeapSample adjusts the data from a heapz Sample to
+// account for its probability of appearing in the collected
+// data. heapz profiles are a sampling of the memory allocations
+// requests in a program. We estimate the unsampled value by dividing
+// each collected sample by its probability of appearing in the
+// profile. heapz v2 profiles rely on a poisson process to determine
+// which samples to collect, based on the desired average collection
+// rate R. The probability of a sample of size S to appear in that
+// profile is 1-exp(-S/R).
+func scaleHeapSample(count, size, rate int64) (int64, int64) {
+	if count == 0 || size == 0 {
+		return 0, 0
+	}
+
+	if rate <= 1 {
+		// if rate==1 all samples were collected so no adjustment is needed.
+		// if rate<1 treat as unknown and skip scaling.
+		return count, size
+	}
+
+	avgSize := float64(size) / float64(count)
+	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
+
+	return int64(float64(count) * scale), int64(float64(size) * scale)
+}
+
+// parseContention parses a mutex or contention profile. There are 2 cases:
+// "--- contentionz " for legacy C++ profiles (and backwards compatibility)
+// "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
+// This code converts the text output from runtime into a *Profile. (In the future
+// the runtime might write a serialized Profile directly making this unnecessary.)
+func parseContention(b []byte) (*Profile, error) {
+	r := bytes.NewBuffer(b)
+	var l string
+	var err error
+	for {
+		// Skip past comments and empty lines seeking a real header.
+		l, err = r.ReadString('\n')
+		if err != nil {
+			return nil, err
+		}
+		if !isSpaceOrComment(l) {
+			break
+		}
+	}
+
+	if strings.HasPrefix(l, "--- contentionz ") {
+		return parseCppContention(r)
+	} else if strings.HasPrefix(l, "--- mutex:") {
+		return parseCppContention(r)
+	} else if strings.HasPrefix(l, "--- contention:") {
+		return parseCppContention(r)
+	}
+	return nil, errUnrecognized
+}
+
+// parseCppContention parses the output from synchronization_profiling.cc
+// for backward compatibility, and the compatible (non-debug) block profile
+// output from the Go runtime.
+func parseCppContention(r *bytes.Buffer) (*Profile, error) {
+	p := &Profile{
+		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
+		Period:     1,
+		SampleType: []*ValueType{
+			{Type: "contentions", Unit: "count"},
+			{Type: "delay", Unit: "nanoseconds"},
+		},
+	}
+
+	var cpuHz int64
+	var l string
+	var err error
+	// Parse text of the form "attribute = value" before the samples.
+	const delimiter = "="
+	for {
+		l, err = r.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return nil, err
+			}
+
+			if l == "" {
+				break
+			}
+		}
+		if isSpaceOrComment(l) {
+			continue
+		}
+
+		if l = strings.TrimSpace(l); l == "" {
+			continue
+		}
+
+		if strings.HasPrefix(l, "---") {
+			break
+		}
+
+		attr := strings.SplitN(l, delimiter, 2)
+		if len(attr) != 2 {
+			break
+		}
+		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
+		var err error
+		switch key {
+		case "cycles/second":
+			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		case "sampling period":
+			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		case "ms since reset":
+			ms, err := strconv.ParseInt(val, 0, 64)
+			if err != nil {
+				return nil, errUnrecognized
+			}
+			p.DurationNanos = ms * 1000 * 1000
+		case "format":
+			// CPP contentionz profiles don't have format.
+			return nil, errUnrecognized
+		case "resolution":
+			// CPP contentionz profiles don't have resolution.
+			return nil, errUnrecognized
+		case "discarded samples":
+		default:
+			return nil, errUnrecognized
+		}
+	}
+
+	locs := make(map[uint64]*Location)
+	for {
+		if !isSpaceOrComment(l) {
+			if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
+				break
+			}
+			value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
+			if err != nil {
+				return nil, err
+			}
+			var sloc []*Location
+			for _, addr := range addrs {
+				// Addresses from stack traces point to the next instruction after
+				// each call. Adjust by -1 to land somewhere on the actual call.
+				addr--
+				loc := locs[addr]
+				if locs[addr] == nil {
+					loc = &Location{
+						Address: addr,
+					}
+					p.Location = append(p.Location, loc)
+					locs[addr] = loc
+				}
+				sloc = append(sloc, loc)
+			}
+			p.Sample = append(p.Sample, &Sample{
+				Value:    value,
+				Location: sloc,
+			})
+		}
+
+		if l, err = r.ReadString('\n'); err != nil {
+			if err != io.EOF {
+				return nil, err
+			}
+			if l == "" {
+				break
+			}
+		}
+	}
+
+	if err = parseAdditionalSections(l, r, p); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+// parseContentionSample parses a single row from a contention profile
+// into a new Sample.
+func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
+	sampleData := contentionSampleRE.FindStringSubmatch(line)
+	if sampleData == nil {
+		return value, addrs, errUnrecognized
+	}
+
+	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
+	if err != nil {
+		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
+	}
+	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
+	if err != nil {
+		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
+	}
+
+	// Unsample values if period and cpuHz are available.
+	// - Delays are scaled to cycles and then to nanoseconds.
+	// - Contentions are scaled to cycles.
+	if period > 0 {
+		if cpuHz > 0 {
+			cpuGHz := float64(cpuHz) / 1e9
+			v1 = int64(float64(v1) * float64(period) / cpuGHz)
+		}
+		v2 = v2 * period
+	}
+
+	value = []int64{v2, v1}
+	addrs = parseHexAddresses(sampleData[3])
+
+	return value, addrs, nil
+}
+
+// parseThread parses a Threadz profile and returns a new Profile.
+func parseThread(b []byte) (*Profile, error) {
+	r := bytes.NewBuffer(b)
+
+	var line string
+	var err error
+	for {
+		// Skip past comments and empty lines seeking a real header.
+		line, err = r.ReadString('\n')
+		if err != nil {
+			return nil, err
+		}
+		if !isSpaceOrComment(line) {
+			break
+		}
+	}
+
+	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
+		// Advance over initial comments until first stack trace.
+		for {
+			line, err = r.ReadString('\n')
+			if err != nil {
+				if err != io.EOF {
+					return nil, err
+				}
+
+				if line == "" {
+					break
+				}
+			}
+			if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
+				break
+			}
+		}
+	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
+		return nil, errUnrecognized
+	}
+
+	p := &Profile{
+		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
+		PeriodType: &ValueType{Type: "thread", Unit: "count"},
+		Period:     1,
+	}
+
+	locs := make(map[uint64]*Location)
+	// Recognize each thread and populate profile samples.
+	for sectionTrigger(line) == unrecognizedSection {
+		if strings.HasPrefix(line, "---- no stack trace for") {
+			line = ""
+			break
+		}
+		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
+			return nil, errUnrecognized
+		}
+
+		var addrs []uint64
+		line, addrs, err = parseThreadSample(r)
+		if err != nil {
+			return nil, errUnrecognized
+		}
+		if len(addrs) == 0 {
+			// We got a --same as previous threads--. Bump counters.
+			if len(p.Sample) > 0 {
+				s := p.Sample[len(p.Sample)-1]
+				s.Value[0]++
+			}
+			continue
+		}
+
+		var sloc []*Location
+		for _, addr := range addrs {
+			// Addresses from stack traces point to the next instruction after
+			// each call. Adjust by -1 to land somewhere on the actual call.
+			addr--
+			loc := locs[addr]
+			if locs[addr] == nil {
+				loc = &Location{
+					Address: addr,
+				}
+				p.Location = append(p.Location, loc)
+				locs[addr] = loc
+			}
+			sloc = append(sloc, loc)
+		}
+
+		p.Sample = append(p.Sample, &Sample{
+			Value:    []int64{1},
+			Location: sloc,
+		})
+	}
+
+	if err = parseAdditionalSections(line, r, p); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+// parseThreadSample parses a symbolized or unsymbolized stack trace.
+// Returns the first line after the traceback, the sample (or nil if
+// it hits a 'same-as-previous' marker) and an error.
+func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
+	var l string
+	sameAsPrevious := false
+	for {
+		if l, err = b.ReadString('\n'); err != nil {
+			if err != io.EOF {
+				return "", nil, err
+			}
+			if l == "" {
+				break
+			}
+		}
+		if l = strings.TrimSpace(l); l == "" {
+			continue
+		}
+
+		if strings.HasPrefix(l, "---") {
+			break
+		}
+		if strings.Contains(l, "same as previous thread") {
+			sameAsPrevious = true
+			continue
+		}
+
+		addrs = append(addrs, parseHexAddresses(l)...)
+	}
+
+	if sameAsPrevious {
+		return l, nil, nil
+	}
+	return l, addrs, nil
+}
+
+// parseAdditionalSections parses any additional sections in the
+// profile, ignoring any unrecognized sections.
+func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
+	for {
+		if sectionTrigger(l) == memoryMapSection {
+			break
+		}
+		// Ignore any unrecognized sections.
+		if l, err := b.ReadString('\n'); err != nil {
+			if err != io.EOF {
+				return err
+			}
+			if l == "" {
+				break
+			}
+		}
+	}
+	return p.ParseMemoryMap(b)
+}
+
+// ParseMemoryMap parses a memory map in the format of
+// /proc/self/maps, and overrides the mappings in the current profile.
+// It renumbers the samples and locations in the profile correspondingly.
+func (p *Profile) ParseMemoryMap(rd io.Reader) error {
+	b := bufio.NewReader(rd)
+
+	var attrs []string
+	var r *strings.Replacer
+	const delimiter = "="
+	for {
+		l, err := b.ReadString('\n')
+		if err != nil {
+			if err != io.EOF {
+				return err
+			}
+			if l == "" {
+				break
+			}
+		}
+		if l = strings.TrimSpace(l); l == "" {
+			continue
+		}
+
+		if r != nil {
+			l = r.Replace(l)
+		}
+		m, err := parseMappingEntry(l)
+		if err != nil {
+			if err == errUnrecognized {
+				// Recognize assignments of the form: attr=value, and replace
+				// $attr with value on subsequent mappings.
+				if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
+					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
+					r = strings.NewReplacer(attrs...)
+				}
+				// Ignore any unrecognized entries
+				continue
+			}
+			return err
+		}
+		if m == nil || (m.File == "" && len(p.Mapping) != 0) {
+			// In some cases the first entry may include the address range
+			// but not the name of the file. It should be followed by
+			// another entry with the name.
+			continue
+		}
+		if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
+			// Update the name if this is the entry following that empty one.
+			p.Mapping[0].File = m.File
+			continue
+		}
+		p.Mapping = append(p.Mapping, m)
+	}
+	p.remapLocationIDs()
+	p.remapFunctionIDs()
+	p.remapMappingIDs()
+	return nil
+}
+
+func parseMappingEntry(l string) (*Mapping, error) {
+	mapping := &Mapping{}
+	var err error
+	if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
+		if !strings.Contains(me[3], "x") {
+			// Skip non-executable entries.
+			return nil, nil
+		}
+		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		if me[4] != "" {
+			if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		}
+		mapping.File = me[8]
+		return mapping, nil
+	}
+
+	if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
+		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
+			return nil, errUnrecognized
+		}
+		mapping.File = me[3]
+		if me[5] != "" {
+			if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
+				return nil, errUnrecognized
+			}
+		}
+		return mapping, nil
+	}
+
+	return nil, errUnrecognized
+}
+
+type sectionType int
+
+const (
+	unrecognizedSection sectionType = iota
+	memoryMapSection
+)
+
+var memoryMapTriggers = []string{
+	"--- Memory map: ---",
+	"MAPPED_LIBRARIES:",
+}
+
+func sectionTrigger(line string) sectionType {
+	for _, trigger := range memoryMapTriggers {
+		if strings.Contains(line, trigger) {
+			return memoryMapSection
+		}
+	}
+	return unrecognizedSection
+}
+
+func (p *Profile) addLegacyFrameInfo() {
+	switch {
+	case isProfileType(p, heapzSampleTypes) ||
+		isProfileType(p, heapzInUseSampleTypes) ||
+		isProfileType(p, heapzAllocSampleTypes):
+		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
+	case isProfileType(p, contentionzSampleTypes):
+		p.DropFrames, p.KeepFrames = lockRxStr, ""
+	default:
+		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
+	}
+}
+
+var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
+var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
+var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
+var contentionzSampleTypes = []string{"contentions", "delay"}
+
+func isProfileType(p *Profile, t []string) bool {
+	st := p.SampleType
+	if len(st) != len(t) {
+		return false
+	}
+
+	for i := range st {
+		if st[i].Type != t[i] {
+			return false
+		}
+	}
+	return true
+}
+
+var allocRxStr = strings.Join([]string{
+	// POSIX entry points.
+	`calloc`,
+	`cfree`,
+	`malloc`,
+	`free`,
+	`memalign`,
+	`do_memalign`,
+	`(__)?posix_memalign`,
+	`pvalloc`,
+	`valloc`,
+	`realloc`,
+
+	// TC malloc.
+	`tcmalloc::.*`,
+	`tc_calloc`,
+	`tc_cfree`,
+	`tc_malloc`,
+	`tc_free`,
+	`tc_memalign`,
+	`tc_posix_memalign`,
+	`tc_pvalloc`,
+	`tc_valloc`,
+	`tc_realloc`,
+	`tc_new`,
+	`tc_delete`,
+	`tc_newarray`,
+	`tc_deletearray`,
+	`tc_new_nothrow`,
+	`tc_newarray_nothrow`,
+
+	// Memory-allocation routines on OS X.
+	`malloc_zone_malloc`,
+	`malloc_zone_calloc`,
+	`malloc_zone_valloc`,
+	`malloc_zone_realloc`,
+	`malloc_zone_memalign`,
+	`malloc_zone_free`,
+
+	// Go runtime
+	`runtime\..*`,
+
+	// Other misc. memory allocation routines
+	`BaseArena::.*`,
+	`(::)?do_malloc_no_errno`,
+	`(::)?do_malloc_pages`,
+	`(::)?do_malloc`,
+	`DoSampledAllocation`,
+	`MallocedMemBlock::MallocedMemBlock`,
+	`_M_allocate`,
+	`__builtin_(vec_)?delete`,
+	`__builtin_(vec_)?new`,
+	`__gnu_cxx::new_allocator::allocate`,
+	`__libc_malloc`,
+	`__malloc_alloc_template::allocate`,
+	`allocate`,
+	`cpp_alloc`,
+	`operator new(\[\])?`,
+	`simple_alloc::allocate`,
+}, `|`)
+
+var allocSkipRxStr = strings.Join([]string{
+	// Preserve Go runtime frames that appear in the middle/bottom of
+	// the stack.
+	`runtime\.panic`,
+	`runtime\.reflectcall`,
+	`runtime\.call[0-9]*`,
+}, `|`)
+
+var cpuProfilerRxStr = strings.Join([]string{
+	`ProfileData::Add`,
+	`ProfileData::prof_handler`,
+	`CpuProfiler::prof_handler`,
+	`__pthread_sighandler`,
+	`__restore`,
+}, `|`)
+
+var lockRxStr = strings.Join([]string{
+	`RecordLockProfileData`,
+	`(base::)?RecordLockProfileData.*`,
+	`(base::)?SubmitMutexProfileData.*`,
+	`(base::)?SubmitSpinLockProfileData.*`,
+	`(Mutex::)?AwaitCommon.*`,
+	`(Mutex::)?Unlock.*`,
+	`(Mutex::)?UnlockSlow.*`,
+	`(Mutex::)?ReaderUnlock.*`,
+	`(MutexLock::)?~MutexLock.*`,
+	`(SpinLock::)?Unlock.*`,
+	`(SpinLock::)?SlowUnlock.*`,
+	`(SpinLockHolder::)?~SpinLockHolder.*`,
+}, `|`)
diff --git a/src/internal/profile/merge.go b/src/internal/profile/merge.go
new file mode 100644
index 0000000..3ea7d4c
--- /dev/null
+++ b/src/internal/profile/merge.go
@@ -0,0 +1,461 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package profile
+
+import (
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+// Merge merges all the profiles in profs into a single Profile.
+// Returns a new profile independent of the input profiles. The merged
+// profile is compacted to eliminate unused samples, locations,
+// functions and mappings. Profiles must have identical profile sample
+// and period types or the merge will fail. profile.Period of the
+// resulting profile will be the maximum of all profiles, and
+// profile.TimeNanos will be the earliest nonzero one.
+func Merge(srcs []*Profile) (*Profile, error) {
+	if len(srcs) == 0 {
+		return nil, fmt.Errorf("no profiles to merge")
+	}
+	p, err := combineHeaders(srcs)
+	if err != nil {
+		return nil, err
+	}
+
+	pm := &profileMerger{
+		p:         p,
+		samples:   make(map[sampleKey]*Sample, len(srcs[0].Sample)),
+		locations: make(map[locationKey]*Location, len(srcs[0].Location)),
+		functions: make(map[functionKey]*Function, len(srcs[0].Function)),
+		mappings:  make(map[mappingKey]*Mapping, len(srcs[0].Mapping)),
+	}
+
+	for _, src := range srcs {
+		// Clear the profile-specific hash tables
+		pm.locationsByID = make(map[uint64]*Location, len(src.Location))
+		pm.functionsByID = make(map[uint64]*Function, len(src.Function))
+		pm.mappingsByID = make(map[uint64]mapInfo, len(src.Mapping))
+
+		if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
+			// The Mapping list has the property that the first mapping
+			// represents the main binary. Take the first Mapping we see,
+			// otherwise the operations below will add mappings in an
+			// arbitrary order.
+			pm.mapMapping(src.Mapping[0])
+		}
+
+		for _, s := range src.Sample {
+			if !isZeroSample(s) {
+				pm.mapSample(s)
+			}
+		}
+	}
+
+	for _, s := range p.Sample {
+		if isZeroSample(s) {
+			// If there are any zero samples, re-merge the profile to GC
+			// them.
+			return Merge([]*Profile{p})
+		}
+	}
+
+	return p, nil
+}
+
+// Normalize normalizes the source profile by multiplying each value in profile by the
+// ratio of the sum of the base profile's values of that sample type to the sum of the
+// source profile's value of that sample type.
+func (p *Profile) Normalize(pb *Profile) error {
+
+	if err := p.compatible(pb); err != nil {
+		return err
+	}
+
+	baseVals := make([]int64, len(p.SampleType))
+	for _, s := range pb.Sample {
+		for i, v := range s.Value {
+			baseVals[i] += v
+		}
+	}
+
+	srcVals := make([]int64, len(p.SampleType))
+	for _, s := range p.Sample {
+		for i, v := range s.Value {
+			srcVals[i] += v
+		}
+	}
+
+	normScale := make([]float64, len(baseVals))
+	for i := range baseVals {
+		if srcVals[i] == 0 {
+			normScale[i] = 0.0
+		} else {
+			normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
+		}
+	}
+	p.ScaleN(normScale)
+	return nil
+}
+
+func isZeroSample(s *Sample) bool {
+	for _, v := range s.Value {
+		if v != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+type profileMerger struct {
+	p *Profile
+
+	// Memoization tables within a profile.
+	locationsByID map[uint64]*Location
+	functionsByID map[uint64]*Function
+	mappingsByID  map[uint64]mapInfo
+
+	// Memoization tables for profile entities.
+	samples   map[sampleKey]*Sample
+	locations map[locationKey]*Location
+	functions map[functionKey]*Function
+	mappings  map[mappingKey]*Mapping
+}
+
+type mapInfo struct {
+	m      *Mapping
+	offset int64
+}
+
+func (pm *profileMerger) mapSample(src *Sample) *Sample {
+	s := &Sample{
+		Location: make([]*Location, len(src.Location)),
+		Value:    make([]int64, len(src.Value)),
+		Label:    make(map[string][]string, len(src.Label)),
+		NumLabel: make(map[string][]int64, len(src.NumLabel)),
+		NumUnit:  make(map[string][]string, len(src.NumLabel)),
+	}
+	for i, l := range src.Location {
+		s.Location[i] = pm.mapLocation(l)
+	}
+	for k, v := range src.Label {
+		vv := make([]string, len(v))
+		copy(vv, v)
+		s.Label[k] = vv
+	}
+	for k, v := range src.NumLabel {
+		u := src.NumUnit[k]
+		vv := make([]int64, len(v))
+		uu := make([]string, len(u))
+		copy(vv, v)
+		copy(uu, u)
+		s.NumLabel[k] = vv
+		s.NumUnit[k] = uu
+	}
+	// Check memoization table. Must be done on the remapped location to
+	// account for the remapped mapping. Add current values to the
+	// existing sample.
+	k := s.key()
+	if ss, ok := pm.samples[k]; ok {
+		for i, v := range src.Value {
+			ss.Value[i] += v
+		}
+		return ss
+	}
+	copy(s.Value, src.Value)
+	pm.samples[k] = s
+	pm.p.Sample = append(pm.p.Sample, s)
+	return s
+}
+
+// key generates sampleKey to be used as a key for maps.
+func (sample *Sample) key() sampleKey {
+	ids := make([]string, len(sample.Location))
+	for i, l := range sample.Location {
+		ids[i] = strconv.FormatUint(l.ID, 16)
+	}
+
+	labels := make([]string, 0, len(sample.Label))
+	for k, v := range sample.Label {
+		labels = append(labels, fmt.Sprintf("%q%q", k, v))
+	}
+	sort.Strings(labels)
+
+	numlabels := make([]string, 0, len(sample.NumLabel))
+	for k, v := range sample.NumLabel {
+		numlabels = append(numlabels, fmt.Sprintf("%q%x%x", k, v, sample.NumUnit[k]))
+	}
+	sort.Strings(numlabels)
+
+	return sampleKey{
+		strings.Join(ids, "|"),
+		strings.Join(labels, ""),
+		strings.Join(numlabels, ""),
+	}
+}
+
+type sampleKey struct {
+	locations string
+	labels    string
+	numlabels string
+}
+
+func (pm *profileMerger) mapLocation(src *Location) *Location {
+	if src == nil {
+		return nil
+	}
+
+	if l, ok := pm.locationsByID[src.ID]; ok {
+		pm.locationsByID[src.ID] = l
+		return l
+	}
+
+	mi := pm.mapMapping(src.Mapping)
+	l := &Location{
+		ID:       uint64(len(pm.p.Location) + 1),
+		Mapping:  mi.m,
+		Address:  uint64(int64(src.Address) + mi.offset),
+		Line:     make([]Line, len(src.Line)),
+		IsFolded: src.IsFolded,
+	}
+	for i, ln := range src.Line {
+		l.Line[i] = pm.mapLine(ln)
+	}
+	// Check memoization table. Must be done on the remapped location to
+	// account for the remapped mapping ID.
+	k := l.key()
+	if ll, ok := pm.locations[k]; ok {
+		pm.locationsByID[src.ID] = ll
+		return ll
+	}
+	pm.locationsByID[src.ID] = l
+	pm.locations[k] = l
+	pm.p.Location = append(pm.p.Location, l)
+	return l
+}
+
+// key generates locationKey to be used as a key for maps.
+func (l *Location) key() locationKey {
+	key := locationKey{
+		addr:     l.Address,
+		isFolded: l.IsFolded,
+	}
+	if l.Mapping != nil {
+		// Normalizes address to handle address space randomization.
+		key.addr -= l.Mapping.Start
+		key.mappingID = l.Mapping.ID
+	}
+	lines := make([]string, len(l.Line)*2)
+	for i, line := range l.Line {
+		if line.Function != nil {
+			lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
+		}
+		lines[i*2+1] = strconv.FormatInt(line.Line, 16)
+	}
+	key.lines = strings.Join(lines, "|")
+	return key
+}
+
+type locationKey struct {
+	addr, mappingID uint64
+	lines           string
+	isFolded        bool
+}
+
+func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
+	if src == nil {
+		return mapInfo{}
+	}
+
+	if mi, ok := pm.mappingsByID[src.ID]; ok {
+		return mi
+	}
+
+	// Check memoization tables.
+	mk := src.key()
+	if m, ok := pm.mappings[mk]; ok {
+		mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
+		pm.mappingsByID[src.ID] = mi
+		return mi
+	}
+	m := &Mapping{
+		ID:              uint64(len(pm.p.Mapping) + 1),
+		Start:           src.Start,
+		Limit:           src.Limit,
+		Offset:          src.Offset,
+		File:            src.File,
+		BuildID:         src.BuildID,
+		HasFunctions:    src.HasFunctions,
+		HasFilenames:    src.HasFilenames,
+		HasLineNumbers:  src.HasLineNumbers,
+		HasInlineFrames: src.HasInlineFrames,
+	}
+	pm.p.Mapping = append(pm.p.Mapping, m)
+
+	// Update memoization tables.
+	pm.mappings[mk] = m
+	mi := mapInfo{m, 0}
+	pm.mappingsByID[src.ID] = mi
+	return mi
+}
+
+// key generates encoded strings of Mapping to be used as a key for
+// maps.
+func (m *Mapping) key() mappingKey {
+	// Normalize addresses to handle address space randomization.
+	// Round up to next 4K boundary to avoid minor discrepancies.
+	const mapsizeRounding = 0x1000
+
+	size := m.Limit - m.Start
+	size = size + mapsizeRounding - 1
+	size = size - (size % mapsizeRounding)
+	key := mappingKey{
+		size:   size,
+		offset: m.Offset,
+	}
+
+	switch {
+	case m.BuildID != "":
+		key.buildIDOrFile = m.BuildID
+	case m.File != "":
+		key.buildIDOrFile = m.File
+	default:
+		// A mapping containing neither build ID nor file name is a fake mapping. A
+		// key with empty buildIDOrFile is used for fake mappings so that they are
+		// treated as the same mapping during merging.
+	}
+	return key
+}
+
+type mappingKey struct {
+	size, offset  uint64
+	buildIDOrFile string
+}
+
+func (pm *profileMerger) mapLine(src Line) Line {
+	ln := Line{
+		Function: pm.mapFunction(src.Function),
+		Line:     src.Line,
+	}
+	return ln
+}
+
+func (pm *profileMerger) mapFunction(src *Function) *Function {
+	if src == nil {
+		return nil
+	}
+	if f, ok := pm.functionsByID[src.ID]; ok {
+		return f
+	}
+	k := src.key()
+	if f, ok := pm.functions[k]; ok {
+		pm.functionsByID[src.ID] = f
+		return f
+	}
+	f := &Function{
+		ID:         uint64(len(pm.p.Function) + 1),
+		Name:       src.Name,
+		SystemName: src.SystemName,
+		Filename:   src.Filename,
+		StartLine:  src.StartLine,
+	}
+	pm.functions[k] = f
+	pm.functionsByID[src.ID] = f
+	pm.p.Function = append(pm.p.Function, f)
+	return f
+}
+
+// key generates a struct to be used as a key for maps.
+func (f *Function) key() functionKey {
+	return functionKey{
+		f.StartLine,
+		f.Name,
+		f.SystemName,
+		f.Filename,
+	}
+}
+
+type functionKey struct {
+	startLine                  int64
+	name, systemName, fileName string
+}
+
+// combineHeaders checks that all profiles can be merged and returns
+// their combined profile.
+func combineHeaders(srcs []*Profile) (*Profile, error) {
+	for _, s := range srcs[1:] {
+		if err := srcs[0].compatible(s); err != nil {
+			return nil, err
+		}
+	}
+
+	var timeNanos, durationNanos, period int64
+	var comments []string
+	seenComments := map[string]bool{}
+	var defaultSampleType string
+	for _, s := range srcs {
+		if timeNanos == 0 || s.TimeNanos < timeNanos {
+			timeNanos = s.TimeNanos
+		}
+		durationNanos += s.DurationNanos
+		if period == 0 || period < s.Period {
+			period = s.Period
+		}
+		for _, c := range s.Comments {
+			if seen := seenComments[c]; !seen {
+				comments = append(comments, c)
+				seenComments[c] = true
+			}
+		}
+		if defaultSampleType == "" {
+			defaultSampleType = s.DefaultSampleType
+		}
+	}
+
+	p := &Profile{
+		SampleType: make([]*ValueType, len(srcs[0].SampleType)),
+
+		DropFrames: srcs[0].DropFrames,
+		KeepFrames: srcs[0].KeepFrames,
+
+		TimeNanos:     timeNanos,
+		DurationNanos: durationNanos,
+		PeriodType:    srcs[0].PeriodType,
+		Period:        period,
+
+		Comments:          comments,
+		DefaultSampleType: defaultSampleType,
+	}
+	copy(p.SampleType, srcs[0].SampleType)
+	return p, nil
+}
+
+// compatible determines if two profiles can be compared/merged.
+// returns nil if the profiles are compatible; otherwise an error with
+// details on the incompatibility.
+func (p *Profile) compatible(pb *Profile) error {
+	if !equalValueType(p.PeriodType, pb.PeriodType) {
+		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
+	}
+
+	if len(p.SampleType) != len(pb.SampleType) {
+		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+	}
+
+	for i := range p.SampleType {
+		if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
+			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+		}
+	}
+	return nil
+}
+
+// equalValueType returns true if the two value types are semantically
+// equal. It ignores the internal fields used during encode/decode.
+func equalValueType(st1, st2 *ValueType) bool {
+	return st1.Type == st2.Type && st1.Unit == st2.Unit
+}
diff --git a/src/internal/profile/profile.go b/src/internal/profile/profile.go
new file mode 100644
index 0000000..29568aa
--- /dev/null
+++ b/src/internal/profile/profile.go
@@ -0,0 +1,613 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package profile provides a representation of
+// github.com/google/pprof/proto/profile.proto and
+// methods to encode/decode/merge profiles in this format.
+package profile
+
+import (
+	"bytes"
+	"compress/gzip"
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+	"time"
+)
+
+// Profile is an in-memory representation of profile.proto.
+type Profile struct {
+	SampleType        []*ValueType
+	DefaultSampleType string
+	Sample            []*Sample
+	Mapping           []*Mapping
+	Location          []*Location
+	Function          []*Function
+	Comments          []string
+
+	DropFrames string
+	KeepFrames string
+
+	TimeNanos     int64
+	DurationNanos int64
+	PeriodType    *ValueType
+	Period        int64
+
+	commentX           []int64
+	dropFramesX        int64
+	keepFramesX        int64
+	stringTable        []string
+	defaultSampleTypeX int64
+}
+
+// ValueType corresponds to Profile.ValueType
+type ValueType struct {
+	Type string // cpu, wall, inuse_space, etc
+	Unit string // seconds, nanoseconds, bytes, etc
+
+	typeX int64
+	unitX int64
+}
+
+// Sample corresponds to Profile.Sample
+type Sample struct {
+	Location []*Location
+	Value    []int64
+	Label    map[string][]string
+	NumLabel map[string][]int64
+	NumUnit  map[string][]string
+
+	locationIDX []uint64
+	labelX      []Label
+}
+
+// Label corresponds to Profile.Label
+type Label struct {
+	keyX int64
+	// Exactly one of the two following values must be set
+	strX int64
+	numX int64 // Integer value for this label
+}
+
+// Mapping corresponds to Profile.Mapping
+type Mapping struct {
+	ID              uint64
+	Start           uint64
+	Limit           uint64
+	Offset          uint64
+	File            string
+	BuildID         string
+	HasFunctions    bool
+	HasFilenames    bool
+	HasLineNumbers  bool
+	HasInlineFrames bool
+
+	fileX    int64
+	buildIDX int64
+}
+
+// Location corresponds to Profile.Location
+type Location struct {
+	ID       uint64
+	Mapping  *Mapping
+	Address  uint64
+	Line     []Line
+	IsFolded bool
+
+	mappingIDX uint64
+}
+
+// Line corresponds to Profile.Line
+type Line struct {
+	Function *Function
+	Line     int64
+
+	functionIDX uint64
+}
+
+// Function corresponds to Profile.Function
+type Function struct {
+	ID         uint64
+	Name       string
+	SystemName string
+	Filename   string
+	StartLine  int64
+
+	nameX       int64
+	systemNameX int64
+	filenameX   int64
+}
+
+// Parse parses a profile and checks for its validity. The input
+// may be a gzip-compressed encoded protobuf or one of many legacy
+// profile formats which may be unsupported in the future.
+func Parse(r io.Reader) (*Profile, error) {
+	orig, err := io.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+
+	var p *Profile
+	if len(orig) >= 2 && orig[0] == 0x1f && orig[1] == 0x8b {
+		gz, err := gzip.NewReader(bytes.NewBuffer(orig))
+		if err != nil {
+			return nil, fmt.Errorf("decompressing profile: %v", err)
+		}
+		data, err := io.ReadAll(gz)
+		if err != nil {
+			return nil, fmt.Errorf("decompressing profile: %v", err)
+		}
+		orig = data
+	}
+	if p, err = parseUncompressed(orig); err != nil {
+		if p, err = parseLegacy(orig); err != nil {
+			return nil, fmt.Errorf("parsing profile: %v", err)
+		}
+	}
+
+	if err := p.CheckValid(); err != nil {
+		return nil, fmt.Errorf("malformed profile: %v", err)
+	}
+	return p, nil
+}
+
+var errUnrecognized = fmt.Errorf("unrecognized profile format")
+var errMalformed = fmt.Errorf("malformed profile format")
+
+func parseLegacy(data []byte) (*Profile, error) {
+	parsers := []func([]byte) (*Profile, error){
+		parseCPU,
+		parseHeap,
+		parseGoCount, // goroutine, threadcreate
+		parseThread,
+		parseContention,
+	}
+
+	for _, parser := range parsers {
+		p, err := parser(data)
+		if err == nil {
+			p.setMain()
+			p.addLegacyFrameInfo()
+			return p, nil
+		}
+		if err != errUnrecognized {
+			return nil, err
+		}
+	}
+	return nil, errUnrecognized
+}
+
+func parseUncompressed(data []byte) (*Profile, error) {
+	p := &Profile{}
+	if err := unmarshal(data, p); err != nil {
+		return nil, err
+	}
+
+	if err := p.postDecode(); err != nil {
+		return nil, err
+	}
+
+	return p, nil
+}
+
+var libRx = regexp.MustCompile(`([.]so$|[.]so[._][0-9]+)`)
+
+// setMain scans Mapping entries and guesses which entry is main
+// because legacy profiles don't obey the convention of putting main
+// first.
+func (p *Profile) setMain() {
+	for i := 0; i < len(p.Mapping); i++ {
+		file := strings.TrimSpace(strings.ReplaceAll(p.Mapping[i].File, "(deleted)", ""))
+		if len(file) == 0 {
+			continue
+		}
+		if len(libRx.FindStringSubmatch(file)) > 0 {
+			continue
+		}
+		if strings.HasPrefix(file, "[") {
+			continue
+		}
+		// Swap what we guess is main to position 0.
+		p.Mapping[i], p.Mapping[0] = p.Mapping[0], p.Mapping[i]
+		break
+	}
+}
+
+// Write writes the profile as a gzip-compressed marshaled protobuf.
+func (p *Profile) Write(w io.Writer) error {
+	p.preEncode()
+	b := marshal(p)
+	zw := gzip.NewWriter(w)
+	defer zw.Close()
+	_, err := zw.Write(b)
+	return err
+}
+
+// CheckValid tests whether the profile is valid. Checks include, but are
+// not limited to:
+//   - len(Profile.Sample[n].value) == len(Profile.value_unit)
+//   - Sample.id has a corresponding Profile.Location
+func (p *Profile) CheckValid() error {
+	// Check that sample values are consistent
+	sampleLen := len(p.SampleType)
+	if sampleLen == 0 && len(p.Sample) != 0 {
+		return fmt.Errorf("missing sample type information")
+	}
+	for _, s := range p.Sample {
+		if len(s.Value) != sampleLen {
+			return fmt.Errorf("mismatch: sample has: %d values vs. %d types", len(s.Value), len(p.SampleType))
+		}
+	}
+
+	// Check that all mappings/locations/functions are in the tables
+	// Check that there are no duplicate ids
+	mappings := make(map[uint64]*Mapping, len(p.Mapping))
+	for _, m := range p.Mapping {
+		if m.ID == 0 {
+			return fmt.Errorf("found mapping with reserved ID=0")
+		}
+		if mappings[m.ID] != nil {
+			return fmt.Errorf("multiple mappings with same id: %d", m.ID)
+		}
+		mappings[m.ID] = m
+	}
+	functions := make(map[uint64]*Function, len(p.Function))
+	for _, f := range p.Function {
+		if f.ID == 0 {
+			return fmt.Errorf("found function with reserved ID=0")
+		}
+		if functions[f.ID] != nil {
+			return fmt.Errorf("multiple functions with same id: %d", f.ID)
+		}
+		functions[f.ID] = f
+	}
+	locations := make(map[uint64]*Location, len(p.Location))
+	for _, l := range p.Location {
+		if l.ID == 0 {
+			return fmt.Errorf("found location with reserved id=0")
+		}
+		if locations[l.ID] != nil {
+			return fmt.Errorf("multiple locations with same id: %d", l.ID)
+		}
+		locations[l.ID] = l
+		if m := l.Mapping; m != nil {
+			if m.ID == 0 || mappings[m.ID] != m {
+				return fmt.Errorf("inconsistent mapping %p: %d", m, m.ID)
+			}
+		}
+		for _, ln := range l.Line {
+			if f := ln.Function; f != nil {
+				if f.ID == 0 || functions[f.ID] != f {
+					return fmt.Errorf("inconsistent function %p: %d", f, f.ID)
+				}
+			}
+		}
+	}
+	return nil
+}
+
+// Aggregate merges the locations in the profile into equivalence
+// classes preserving the request attributes. It also updates the
+// samples to point to the merged locations.
+func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error {
+	for _, m := range p.Mapping {
+		m.HasInlineFrames = m.HasInlineFrames && inlineFrame
+		m.HasFunctions = m.HasFunctions && function
+		m.HasFilenames = m.HasFilenames && filename
+		m.HasLineNumbers = m.HasLineNumbers && linenumber
+	}
+
+	// Aggregate functions
+	if !function || !filename {
+		for _, f := range p.Function {
+			if !function {
+				f.Name = ""
+				f.SystemName = ""
+			}
+			if !filename {
+				f.Filename = ""
+			}
+		}
+	}
+
+	// Aggregate locations
+	if !inlineFrame || !address || !linenumber {
+		for _, l := range p.Location {
+			if !inlineFrame && len(l.Line) > 1 {
+				l.Line = l.Line[len(l.Line)-1:]
+			}
+			if !linenumber {
+				for i := range l.Line {
+					l.Line[i].Line = 0
+				}
+			}
+			if !address {
+				l.Address = 0
+			}
+		}
+	}
+
+	return p.CheckValid()
+}
+
+// Print dumps a text representation of a profile. Intended mainly
+// for debugging purposes.
+func (p *Profile) String() string {
+
+	ss := make([]string, 0, len(p.Sample)+len(p.Mapping)+len(p.Location))
+	if pt := p.PeriodType; pt != nil {
+		ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit))
+	}
+	ss = append(ss, fmt.Sprintf("Period: %d", p.Period))
+	if p.TimeNanos != 0 {
+		ss = append(ss, fmt.Sprintf("Time: %v", time.Unix(0, p.TimeNanos)))
+	}
+	if p.DurationNanos != 0 {
+		ss = append(ss, fmt.Sprintf("Duration: %v", time.Duration(p.DurationNanos)))
+	}
+
+	ss = append(ss, "Samples:")
+	var sh1 string
+	for _, s := range p.SampleType {
+		sh1 = sh1 + fmt.Sprintf("%s/%s ", s.Type, s.Unit)
+	}
+	ss = append(ss, strings.TrimSpace(sh1))
+	for _, s := range p.Sample {
+		var sv string
+		for _, v := range s.Value {
+			sv = fmt.Sprintf("%s %10d", sv, v)
+		}
+		sv = sv + ": "
+		for _, l := range s.Location {
+			sv = sv + fmt.Sprintf("%d ", l.ID)
+		}
+		ss = append(ss, sv)
+		const labelHeader = "                "
+		if len(s.Label) > 0 {
+			ls := labelHeader
+			for k, v := range s.Label {
+				ls = ls + fmt.Sprintf("%s:%v ", k, v)
+			}
+			ss = append(ss, ls)
+		}
+		if len(s.NumLabel) > 0 {
+			ls := labelHeader
+			for k, v := range s.NumLabel {
+				ls = ls + fmt.Sprintf("%s:%v ", k, v)
+			}
+			ss = append(ss, ls)
+		}
+	}
+
+	ss = append(ss, "Locations")
+	for _, l := range p.Location {
+		locStr := fmt.Sprintf("%6d: %#x ", l.ID, l.Address)
+		if m := l.Mapping; m != nil {
+			locStr = locStr + fmt.Sprintf("M=%d ", m.ID)
+		}
+		if len(l.Line) == 0 {
+			ss = append(ss, locStr)
+		}
+		for li := range l.Line {
+			lnStr := "??"
+			if fn := l.Line[li].Function; fn != nil {
+				lnStr = fmt.Sprintf("%s %s:%d s=%d",
+					fn.Name,
+					fn.Filename,
+					l.Line[li].Line,
+					fn.StartLine)
+				if fn.Name != fn.SystemName {
+					lnStr = lnStr + "(" + fn.SystemName + ")"
+				}
+			}
+			ss = append(ss, locStr+lnStr)
+			// Do not print location details past the first line
+			locStr = "             "
+		}
+	}
+
+	ss = append(ss, "Mappings")
+	for _, m := range p.Mapping {
+		bits := ""
+		if m.HasFunctions {
+			bits += "[FN]"
+		}
+		if m.HasFilenames {
+			bits += "[FL]"
+		}
+		if m.HasLineNumbers {
+			bits += "[LN]"
+		}
+		if m.HasInlineFrames {
+			bits += "[IN]"
+		}
+		ss = append(ss, fmt.Sprintf("%d: %#x/%#x/%#x %s %s %s",
+			m.ID,
+			m.Start, m.Limit, m.Offset,
+			m.File,
+			m.BuildID,
+			bits))
+	}
+
+	return strings.Join(ss, "\n") + "\n"
+}
+
+// Merge adds profile p adjusted by ratio r into profile p. Profiles
+// must be compatible (same Type and SampleType).
+// TODO(rsilvera): consider normalizing the profiles based on the
+// total samples collected.
+func (p *Profile) Merge(pb *Profile, r float64) error {
+	if err := p.Compatible(pb); err != nil {
+		return err
+	}
+
+	pb = pb.Copy()
+
+	// Keep the largest of the two periods.
+	if pb.Period > p.Period {
+		p.Period = pb.Period
+	}
+
+	p.DurationNanos += pb.DurationNanos
+
+	p.Mapping = append(p.Mapping, pb.Mapping...)
+	for i, m := range p.Mapping {
+		m.ID = uint64(i + 1)
+	}
+	p.Location = append(p.Location, pb.Location...)
+	for i, l := range p.Location {
+		l.ID = uint64(i + 1)
+	}
+	p.Function = append(p.Function, pb.Function...)
+	for i, f := range p.Function {
+		f.ID = uint64(i + 1)
+	}
+
+	if r != 1.0 {
+		for _, s := range pb.Sample {
+			for i, v := range s.Value {
+				s.Value[i] = int64((float64(v) * r))
+			}
+		}
+	}
+	p.Sample = append(p.Sample, pb.Sample...)
+	return p.CheckValid()
+}
+
+// Compatible determines if two profiles can be compared/merged.
+// returns nil if the profiles are compatible; otherwise an error with
+// details on the incompatibility.
+func (p *Profile) Compatible(pb *Profile) error {
+	if !compatibleValueTypes(p.PeriodType, pb.PeriodType) {
+		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
+	}
+
+	if len(p.SampleType) != len(pb.SampleType) {
+		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+	}
+
+	for i := range p.SampleType {
+		if !compatibleValueTypes(p.SampleType[i], pb.SampleType[i]) {
+			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
+		}
+	}
+
+	return nil
+}
+
+// HasFunctions determines if all locations in this profile have
+// symbolized function information.
+func (p *Profile) HasFunctions() bool {
+	for _, l := range p.Location {
+		if l.Mapping == nil || !l.Mapping.HasFunctions {
+			return false
+		}
+	}
+	return true
+}
+
+// HasFileLines determines if all locations in this profile have
+// symbolized file and line number information.
+func (p *Profile) HasFileLines() bool {
+	for _, l := range p.Location {
+		if l.Mapping == nil || (!l.Mapping.HasFilenames || !l.Mapping.HasLineNumbers) {
+			return false
+		}
+	}
+	return true
+}
+
+func compatibleValueTypes(v1, v2 *ValueType) bool {
+	if v1 == nil || v2 == nil {
+		return true // No grounds to disqualify.
+	}
+	return v1.Type == v2.Type && v1.Unit == v2.Unit
+}
+
+// Copy makes a fully independent copy of a profile.
+func (p *Profile) Copy() *Profile {
+	p.preEncode()
+	b := marshal(p)
+
+	pp := &Profile{}
+	if err := unmarshal(b, pp); err != nil {
+		panic(err)
+	}
+	if err := pp.postDecode(); err != nil {
+		panic(err)
+	}
+
+	return pp
+}
+
+// Demangler maps symbol names to a human-readable form. This may
+// include C++ demangling and additional simplification. Names that
+// are not demangled may be missing from the resulting map.
+type Demangler func(name []string) (map[string]string, error)
+
+// Demangle attempts to demangle and optionally simplify any function
+// names referenced in the profile. It works on a best-effort basis:
+// it will silently preserve the original names in case of any errors.
+func (p *Profile) Demangle(d Demangler) error {
+	// Collect names to demangle.
+	var names []string
+	for _, fn := range p.Function {
+		names = append(names, fn.SystemName)
+	}
+
+	// Update profile with demangled names.
+	demangled, err := d(names)
+	if err != nil {
+		return err
+	}
+	for _, fn := range p.Function {
+		if dd, ok := demangled[fn.SystemName]; ok {
+			fn.Name = dd
+		}
+	}
+	return nil
+}
+
+// Empty reports whether the profile contains no samples.
+func (p *Profile) Empty() bool {
+	return len(p.Sample) == 0
+}
+
+// Scale multiplies all sample values in a profile by a constant.
+func (p *Profile) Scale(ratio float64) {
+	if ratio == 1 {
+		return
+	}
+	ratios := make([]float64, len(p.SampleType))
+	for i := range p.SampleType {
+		ratios[i] = ratio
+	}
+	p.ScaleN(ratios)
+}
+
+// ScaleN multiplies each sample values in a sample by a different amount.
+func (p *Profile) ScaleN(ratios []float64) error {
+	if len(p.SampleType) != len(ratios) {
+		return fmt.Errorf("mismatched scale ratios, got %d, want %d", len(ratios), len(p.SampleType))
+	}
+	allOnes := true
+	for _, r := range ratios {
+		if r != 1 {
+			allOnes = false
+			break
+		}
+	}
+	if allOnes {
+		return nil
+	}
+	for _, s := range p.Sample {
+		for i, v := range s.Value {
+			if ratios[i] != 1 {
+				s.Value[i] = int64(float64(v) * ratios[i])
+			}
+		}
+	}
+	return nil
+}
diff --git a/src/internal/profile/profile_test.go b/src/internal/profile/profile_test.go
new file mode 100644
index 0000000..e1963f3
--- /dev/null
+++ b/src/internal/profile/profile_test.go
@@ -0,0 +1,79 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package profile
+
+import (
+	"bytes"
+	"testing"
+)
+
+func TestEmptyProfile(t *testing.T) {
+	var buf bytes.Buffer
+	p, err := Parse(&buf)
+	if err != nil {
+		t.Error("Want no error, got", err)
+	}
+	if p == nil {
+		t.Fatal("Want a valid profile, got <nil>")
+	}
+	if !p.Empty() {
+		t.Errorf("Profile should be empty, got %#v", p)
+	}
+}
+
+func TestParseContention(t *testing.T) {
+	tests := []struct {
+		name    string
+		in      string
+		wantErr bool
+	}{
+		{
+			name: "valid",
+			in: `--- mutex:
+cycles/second=3491920901
+sampling period=1
+43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31
+34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31
+`,
+		},
+		{
+			name: "valid with comment",
+			in: `--- mutex:
+cycles/second=3491920901
+sampling period=1
+43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31
+#	0x45e850	sync.(*Mutex).Unlock+0x80	/go/src/sync/mutex.go:126
+#	0x45f763	sync.(*RWMutex).Unlock+0x83	/go/src/sync/rwmutex.go:125
+#	0x4a2be0	main.main.func3+0x70		/go/src/internal/pprof/profile/a_binary.go:58
+
+34035731690 15760 @ 0x45e851 0x45f764 0x4a2b17 0x44ea31
+#	0x45e850	sync.(*Mutex).Unlock+0x80	/go/src/sync/mutex.go:126
+#	0x45f763	sync.(*RWMutex).Unlock+0x83	/go/src/sync/rwmutex.go:125
+#	0x4a2b16	main.main.func2+0xd6		/go/src/internal/pprof/profile/a_binary.go:48
+`,
+		},
+		{
+			name:    "empty",
+			in:      `--- mutex:`,
+			wantErr: true,
+		},
+		{
+			name: "invalid header",
+			in: `--- channel:
+43227965305 1659640 @ 0x45e851 0x45f764 0x4a2be1 0x44ea31`,
+			wantErr: true,
+		},
+	}
+	for _, tc := range tests {
+		_, err := parseContention([]byte(tc.in))
+		if tc.wantErr && err == nil {
+			t.Errorf("parseContention(%q) succeeded unexpectedly", tc.name)
+		}
+		if !tc.wantErr && err != nil {
+			t.Errorf("parseContention(%q) failed unexpectedly: %v", tc.name, err)
+		}
+	}
+
+}
diff --git a/src/internal/profile/proto.go b/src/internal/profile/proto.go
new file mode 100644
index 0000000..52cf1ef
--- /dev/null
+++ b/src/internal/profile/proto.go
@@ -0,0 +1,363 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file is a simple protocol buffer encoder and decoder.
+//
+// A protocol message must implement the message interface:
+//   decoder() []decoder
+//   encode(*buffer)
+//
+// The decode method returns a slice indexed by field number that gives the
+// function to decode that field.
+// The encode method encodes its receiver into the given buffer.
+//
+// The two methods are simple enough to be implemented by hand rather than
+// by using a protocol compiler.
+//
+// See profile.go for examples of messages implementing this interface.
+//
+// There is no support for groups, message sets, or "has" bits.
+
+package profile
+
+import (
+	"errors"
+	"fmt"
+)
+
+type buffer struct {
+	field int
+	typ   int
+	u64   uint64
+	data  []byte
+	tmp   [16]byte
+}
+
+type decoder func(*buffer, message) error
+
+type message interface {
+	decoder() []decoder
+	encode(*buffer)
+}
+
+func marshal(m message) []byte {
+	var b buffer
+	m.encode(&b)
+	return b.data
+}
+
+func encodeVarint(b *buffer, x uint64) {
+	for x >= 128 {
+		b.data = append(b.data, byte(x)|0x80)
+		x >>= 7
+	}
+	b.data = append(b.data, byte(x))
+}
+
+func encodeLength(b *buffer, tag int, len int) {
+	encodeVarint(b, uint64(tag)<<3|2)
+	encodeVarint(b, uint64(len))
+}
+
+func encodeUint64(b *buffer, tag int, x uint64) {
+	// append varint to b.data
+	encodeVarint(b, uint64(tag)<<3|0)
+	encodeVarint(b, x)
+}
+
+func encodeUint64s(b *buffer, tag int, x []uint64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			encodeVarint(b, u)
+		}
+		n2 := len(b.data)
+		encodeLength(b, tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		encodeUint64(b, tag, u)
+	}
+}
+
+func encodeUint64Opt(b *buffer, tag int, x uint64) {
+	if x == 0 {
+		return
+	}
+	encodeUint64(b, tag, x)
+}
+
+func encodeInt64(b *buffer, tag int, x int64) {
+	u := uint64(x)
+	encodeUint64(b, tag, u)
+}
+
+func encodeInt64Opt(b *buffer, tag int, x int64) {
+	if x == 0 {
+		return
+	}
+	encodeInt64(b, tag, x)
+}
+
+func encodeInt64s(b *buffer, tag int, x []int64) {
+	if len(x) > 2 {
+		// Use packed encoding
+		n1 := len(b.data)
+		for _, u := range x {
+			encodeVarint(b, uint64(u))
+		}
+		n2 := len(b.data)
+		encodeLength(b, tag, n2-n1)
+		n3 := len(b.data)
+		copy(b.tmp[:], b.data[n2:n3])
+		copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+		copy(b.data[n1:], b.tmp[:n3-n2])
+		return
+	}
+	for _, u := range x {
+		encodeInt64(b, tag, u)
+	}
+}
+
+func encodeString(b *buffer, tag int, x string) {
+	encodeLength(b, tag, len(x))
+	b.data = append(b.data, x...)
+}
+
+func encodeStrings(b *buffer, tag int, x []string) {
+	for _, s := range x {
+		encodeString(b, tag, s)
+	}
+}
+
+func encodeStringOpt(b *buffer, tag int, x string) {
+	if x == "" {
+		return
+	}
+	encodeString(b, tag, x)
+}
+
+func encodeBool(b *buffer, tag int, x bool) {
+	if x {
+		encodeUint64(b, tag, 1)
+	} else {
+		encodeUint64(b, tag, 0)
+	}
+}
+
+func encodeBoolOpt(b *buffer, tag int, x bool) {
+	if x == false {
+		return
+	}
+	encodeBool(b, tag, x)
+}
+
+func encodeMessage(b *buffer, tag int, m message) {
+	n1 := len(b.data)
+	m.encode(b)
+	n2 := len(b.data)
+	encodeLength(b, tag, n2-n1)
+	n3 := len(b.data)
+	copy(b.tmp[:], b.data[n2:n3])
+	copy(b.data[n1+(n3-n2):], b.data[n1:n2])
+	copy(b.data[n1:], b.tmp[:n3-n2])
+}
+
+func unmarshal(data []byte, m message) (err error) {
+	b := buffer{data: data, typ: 2}
+	return decodeMessage(&b, m)
+}
+
+func le64(p []byte) uint64 {
+	return uint64(p[0]) | uint64(p[1])<<8 | uint64(p[2])<<16 | uint64(p[3])<<24 | uint64(p[4])<<32 | uint64(p[5])<<40 | uint64(p[6])<<48 | uint64(p[7])<<56
+}
+
+func le32(p []byte) uint32 {
+	return uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+}
+
+func decodeVarint(data []byte) (uint64, []byte, error) {
+	var i int
+	var u uint64
+	for i = 0; ; i++ {
+		if i >= 10 || i >= len(data) {
+			return 0, nil, errors.New("bad varint")
+		}
+		u |= uint64(data[i]&0x7F) << uint(7*i)
+		if data[i]&0x80 == 0 {
+			return u, data[i+1:], nil
+		}
+	}
+}
+
+func decodeField(b *buffer, data []byte) ([]byte, error) {
+	x, data, err := decodeVarint(data)
+	if err != nil {
+		return nil, err
+	}
+	b.field = int(x >> 3)
+	b.typ = int(x & 7)
+	b.data = nil
+	b.u64 = 0
+	switch b.typ {
+	case 0:
+		b.u64, data, err = decodeVarint(data)
+		if err != nil {
+			return nil, err
+		}
+	case 1:
+		if len(data) < 8 {
+			return nil, errors.New("not enough data")
+		}
+		b.u64 = le64(data[:8])
+		data = data[8:]
+	case 2:
+		var n uint64
+		n, data, err = decodeVarint(data)
+		if err != nil {
+			return nil, err
+		}
+		if n > uint64(len(data)) {
+			return nil, errors.New("too much data")
+		}
+		b.data = data[:n]
+		data = data[n:]
+	case 5:
+		if len(data) < 4 {
+			return nil, errors.New("not enough data")
+		}
+		b.u64 = uint64(le32(data[:4]))
+		data = data[4:]
+	default:
+		return nil, fmt.Errorf("unknown wire type: %d", b.typ)
+	}
+
+	return data, nil
+}
+
+func checkType(b *buffer, typ int) error {
+	if b.typ != typ {
+		return errors.New("type mismatch")
+	}
+	return nil
+}
+
+func decodeMessage(b *buffer, m message) error {
+	if err := checkType(b, 2); err != nil {
+		return err
+	}
+	dec := m.decoder()
+	data := b.data
+	for len(data) > 0 {
+		// pull varint field# + type
+		var err error
+		data, err = decodeField(b, data)
+		if err != nil {
+			return err
+		}
+		if b.field >= len(dec) || dec[b.field] == nil {
+			continue
+		}
+		if err := dec[b.field](b, m); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func decodeInt64(b *buffer, x *int64) error {
+	if err := checkType(b, 0); err != nil {
+		return err
+	}
+	*x = int64(b.u64)
+	return nil
+}
+
+func decodeInt64s(b *buffer, x *[]int64) error {
+	if b.typ == 2 {
+		// Packed encoding
+		data := b.data
+		for len(data) > 0 {
+			var u uint64
+			var err error
+
+			if u, data, err = decodeVarint(data); err != nil {
+				return err
+			}
+			*x = append(*x, int64(u))
+		}
+		return nil
+	}
+	var i int64
+	if err := decodeInt64(b, &i); err != nil {
+		return err
+	}
+	*x = append(*x, i)
+	return nil
+}
+
+func decodeUint64(b *buffer, x *uint64) error {
+	if err := checkType(b, 0); err != nil {
+		return err
+	}
+	*x = b.u64
+	return nil
+}
+
+func decodeUint64s(b *buffer, x *[]uint64) error {
+	if b.typ == 2 {
+		data := b.data
+		// Packed encoding
+		for len(data) > 0 {
+			var u uint64
+			var err error
+
+			if u, data, err = decodeVarint(data); err != nil {
+				return err
+			}
+			*x = append(*x, u)
+		}
+		return nil
+	}
+	var u uint64
+	if err := decodeUint64(b, &u); err != nil {
+		return err
+	}
+	*x = append(*x, u)
+	return nil
+}
+
+func decodeString(b *buffer, x *string) error {
+	if err := checkType(b, 2); err != nil {
+		return err
+	}
+	*x = string(b.data)
+	return nil
+}
+
+func decodeStrings(b *buffer, x *[]string) error {
+	var s string
+	if err := decodeString(b, &s); err != nil {
+		return err
+	}
+	*x = append(*x, s)
+	return nil
+}
+
+func decodeBool(b *buffer, x *bool) error {
+	if err := checkType(b, 0); err != nil {
+		return err
+	}
+	if int64(b.u64) == 0 {
+		*x = false
+	} else {
+		*x = true
+	}
+	return nil
+}
diff --git a/src/internal/profile/proto_test.go b/src/internal/profile/proto_test.go
new file mode 100644
index 0000000..c2613fc
--- /dev/null
+++ b/src/internal/profile/proto_test.go
@@ -0,0 +1,67 @@
+package profile
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestPackedEncoding(t *testing.T) {
+
+	type testcase struct {
+		uint64s []uint64
+		int64s  []int64
+		encoded []byte
+	}
+	for i, tc := range []testcase{
+		{
+			[]uint64{0, 1, 10, 100, 1000, 10000},
+			[]int64{1000, 0, 1000},
+			[]byte{10, 8, 0, 1, 10, 100, 232, 7, 144, 78, 18, 5, 232, 7, 0, 232, 7},
+		},
+		{
+			[]uint64{10000},
+			nil,
+			[]byte{8, 144, 78},
+		},
+		{
+			nil,
+			[]int64{-10000},
+			[]byte{16, 240, 177, 255, 255, 255, 255, 255, 255, 255, 1},
+		},
+	} {
+		source := &packedInts{tc.uint64s, tc.int64s}
+		if got, want := marshal(source), tc.encoded; !reflect.DeepEqual(got, want) {
+			t.Errorf("failed encode %d, got %v, want %v", i, got, want)
+		}
+
+		dest := new(packedInts)
+		if err := unmarshal(tc.encoded, dest); err != nil {
+			t.Errorf("failed decode %d: %v", i, err)
+			continue
+		}
+		if got, want := dest.uint64s, tc.uint64s; !reflect.DeepEqual(got, want) {
+			t.Errorf("failed decode uint64s %d, got %v, want %v", i, got, want)
+		}
+		if got, want := dest.int64s, tc.int64s; !reflect.DeepEqual(got, want) {
+			t.Errorf("failed decode int64s %d, got %v, want %v", i, got, want)
+		}
+	}
+}
+
+type packedInts struct {
+	uint64s []uint64
+	int64s  []int64
+}
+
+func (u *packedInts) decoder() []decoder {
+	return []decoder{
+		nil,
+		func(b *buffer, m message) error { return decodeUint64s(b, &m.(*packedInts).uint64s) },
+		func(b *buffer, m message) error { return decodeInt64s(b, &m.(*packedInts).int64s) },
+	}
+}
+
+func (u *packedInts) encode(b *buffer) {
+	encodeUint64s(b, 1, u.uint64s)
+	encodeInt64s(b, 2, u.int64s)
+}
diff --git a/src/internal/profile/prune.go b/src/internal/profile/prune.go
new file mode 100644
index 0000000..1924fad
--- /dev/null
+++ b/src/internal/profile/prune.go
@@ -0,0 +1,97 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implements methods to remove frames from profiles.
+
+package profile
+
+import (
+	"fmt"
+	"regexp"
+)
+
+// Prune removes all nodes beneath a node matching dropRx, and not
+// matching keepRx. If the root node of a Sample matches, the sample
+// will have an empty stack.
+func (p *Profile) Prune(dropRx, keepRx *regexp.Regexp) {
+	prune := make(map[uint64]bool)
+	pruneBeneath := make(map[uint64]bool)
+
+	for _, loc := range p.Location {
+		var i int
+		for i = len(loc.Line) - 1; i >= 0; i-- {
+			if fn := loc.Line[i].Function; fn != nil && fn.Name != "" {
+				funcName := fn.Name
+				// Account for leading '.' on the PPC ELF v1 ABI.
+				if funcName[0] == '.' {
+					funcName = funcName[1:]
+				}
+				if dropRx.MatchString(funcName) {
+					if keepRx == nil || !keepRx.MatchString(funcName) {
+						break
+					}
+				}
+			}
+		}
+
+		if i >= 0 {
+			// Found matching entry to prune.
+			pruneBeneath[loc.ID] = true
+
+			// Remove the matching location.
+			if i == len(loc.Line)-1 {
+				// Matched the top entry: prune the whole location.
+				prune[loc.ID] = true
+			} else {
+				loc.Line = loc.Line[i+1:]
+			}
+		}
+	}
+
+	// Prune locs from each Sample
+	for _, sample := range p.Sample {
+		// Scan from the root to the leaves to find the prune location.
+		// Do not prune frames before the first user frame, to avoid
+		// pruning everything.
+		foundUser := false
+		for i := len(sample.Location) - 1; i >= 0; i-- {
+			id := sample.Location[i].ID
+			if !prune[id] && !pruneBeneath[id] {
+				foundUser = true
+				continue
+			}
+			if !foundUser {
+				continue
+			}
+			if prune[id] {
+				sample.Location = sample.Location[i+1:]
+				break
+			}
+			if pruneBeneath[id] {
+				sample.Location = sample.Location[i:]
+				break
+			}
+		}
+	}
+}
+
+// RemoveUninteresting prunes and elides profiles using built-in
+// tables of uninteresting function names.
+func (p *Profile) RemoveUninteresting() error {
+	var keep, drop *regexp.Regexp
+	var err error
+
+	if p.DropFrames != "" {
+		if drop, err = regexp.Compile("^(" + p.DropFrames + ")$"); err != nil {
+			return fmt.Errorf("failed to compile regexp %s: %v", p.DropFrames, err)
+		}
+		if p.KeepFrames != "" {
+			if keep, err = regexp.Compile("^(" + p.KeepFrames + ")$"); err != nil {
+				return fmt.Errorf("failed to compile regexp %s: %v", p.KeepFrames, err)
+			}
+		}
+		p.Prune(drop, keep)
+	}
+	return nil
+}
diff --git a/src/internal/race/doc.go b/src/internal/race/doc.go
new file mode 100644
index 0000000..8fa44ce
--- /dev/null
+++ b/src/internal/race/doc.go
@@ -0,0 +1,11 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package race contains helper functions for manually instrumenting code for the race detector.
+
+The runtime package intentionally exports these functions only in the race build;
+this package exports them unconditionally but without the "race" build tag they are no-ops.
+*/
+package race
diff --git a/src/internal/race/norace.go b/src/internal/race/norace.go
new file mode 100644
index 0000000..67b1305
--- /dev/null
+++ b/src/internal/race/norace.go
@@ -0,0 +1,43 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !race
+// +build !race
+
+package race
+
+import (
+	"unsafe"
+)
+
+const Enabled = false
+
+func Acquire(addr unsafe.Pointer) {
+}
+
+func Release(addr unsafe.Pointer) {
+}
+
+func ReleaseMerge(addr unsafe.Pointer) {
+}
+
+func Disable() {
+}
+
+func Enable() {
+}
+
+func Read(addr unsafe.Pointer) {
+}
+
+func Write(addr unsafe.Pointer) {
+}
+
+func ReadRange(addr unsafe.Pointer, len int) {
+}
+
+func WriteRange(addr unsafe.Pointer, len int) {
+}
+
+func Errors() int { return 0 }
diff --git a/src/internal/race/race.go b/src/internal/race/race.go
new file mode 100644
index 0000000..40f2c99
--- /dev/null
+++ b/src/internal/race/race.go
@@ -0,0 +1,55 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build race
+// +build race
+
+package race
+
+import (
+	"runtime"
+	"unsafe"
+)
+
+const Enabled = true
+
+func Acquire(addr unsafe.Pointer) {
+	runtime.RaceAcquire(addr)
+}
+
+func Release(addr unsafe.Pointer) {
+	runtime.RaceRelease(addr)
+}
+
+func ReleaseMerge(addr unsafe.Pointer) {
+	runtime.RaceReleaseMerge(addr)
+}
+
+func Disable() {
+	runtime.RaceDisable()
+}
+
+func Enable() {
+	runtime.RaceEnable()
+}
+
+func Read(addr unsafe.Pointer) {
+	runtime.RaceRead(addr)
+}
+
+func Write(addr unsafe.Pointer) {
+	runtime.RaceWrite(addr)
+}
+
+func ReadRange(addr unsafe.Pointer, len int) {
+	runtime.RaceReadRange(addr, len)
+}
+
+func WriteRange(addr unsafe.Pointer, len int) {
+	runtime.RaceWriteRange(addr, len)
+}
+
+func Errors() int {
+	return runtime.RaceErrors()
+}
diff --git a/src/internal/reflectlite/all_test.go b/src/internal/reflectlite/all_test.go
new file mode 100644
index 0000000..e15f364
--- /dev/null
+++ b/src/internal/reflectlite/all_test.go
@@ -0,0 +1,1033 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite_test
+
+import (
+	"encoding/base64"
+	"fmt"
+	. "internal/reflectlite"
+	"math"
+	"reflect"
+	"runtime"
+	"testing"
+	"unsafe"
+)
+
+func ToValue(v Value) reflect.Value {
+	return reflect.ValueOf(ToInterface(v))
+}
+
+func TypeString(t Type) string {
+	return fmt.Sprintf("%T", ToInterface(Zero(t)))
+}
+
+type integer int
+type T struct {
+	a int
+	b float64
+	c string
+	d *int
+}
+
+type pair struct {
+	i interface{}
+	s string
+}
+
+func assert(t *testing.T, s, want string) {
+	t.Helper()
+	if s != want {
+		t.Errorf("have %#q want %#q", s, want)
+	}
+}
+
+var typeTests = []pair{
+	{struct{ x int }{}, "int"},
+	{struct{ x int8 }{}, "int8"},
+	{struct{ x int16 }{}, "int16"},
+	{struct{ x int32 }{}, "int32"},
+	{struct{ x int64 }{}, "int64"},
+	{struct{ x uint }{}, "uint"},
+	{struct{ x uint8 }{}, "uint8"},
+	{struct{ x uint16 }{}, "uint16"},
+	{struct{ x uint32 }{}, "uint32"},
+	{struct{ x uint64 }{}, "uint64"},
+	{struct{ x float32 }{}, "float32"},
+	{struct{ x float64 }{}, "float64"},
+	{struct{ x int8 }{}, "int8"},
+	{struct{ x (**int8) }{}, "**int8"},
+	{struct{ x (**integer) }{}, "**reflectlite_test.integer"},
+	{struct{ x ([32]int32) }{}, "[32]int32"},
+	{struct{ x ([]int8) }{}, "[]int8"},
+	{struct{ x (map[string]int32) }{}, "map[string]int32"},
+	{struct{ x (chan<- string) }{}, "chan<- string"},
+	{struct {
+		x struct {
+			c chan *int32
+			d float32
+		}
+	}{},
+		"struct { c chan *int32; d float32 }",
+	},
+	{struct{ x (func(a int8, b int32)) }{}, "func(int8, int32)"},
+	{struct {
+		x struct {
+			c func(chan *integer, *int8)
+		}
+	}{},
+		"struct { c func(chan *reflectlite_test.integer, *int8) }",
+	},
+	{struct {
+		x struct {
+			a int8
+			b int32
+		}
+	}{},
+		"struct { a int8; b int32 }",
+	},
+	{struct {
+		x struct {
+			a int8
+			b int8
+			c int32
+		}
+	}{},
+		"struct { a int8; b int8; c int32 }",
+	},
+	{struct {
+		x struct {
+			a int8
+			b int8
+			c int8
+			d int32
+		}
+	}{},
+		"struct { a int8; b int8; c int8; d int32 }",
+	},
+	{struct {
+		x struct {
+			a int8
+			b int8
+			c int8
+			d int8
+			e int32
+		}
+	}{},
+		"struct { a int8; b int8; c int8; d int8; e int32 }",
+	},
+	{struct {
+		x struct {
+			a int8
+			b int8
+			c int8
+			d int8
+			e int8
+			f int32
+		}
+	}{},
+		"struct { a int8; b int8; c int8; d int8; e int8; f int32 }",
+	},
+	{struct {
+		x struct {
+			a int8 `reflect:"hi there"`
+		}
+	}{},
+		`struct { a int8 "reflect:\"hi there\"" }`,
+	},
+	{struct {
+		x struct {
+			a int8 `reflect:"hi \x00there\t\n\"\\"`
+		}
+	}{},
+		`struct { a int8 "reflect:\"hi \\x00there\\t\\n\\\"\\\\\"" }`,
+	},
+	{struct {
+		x struct {
+			f func(args ...int)
+		}
+	}{},
+		"struct { f func(...int) }",
+	},
+	// {struct {
+	// 	x (interface {
+	// 		a(func(func(int) int) func(func(int)) int)
+	// 		b()
+	// 	})
+	// }{},
+	// 	"interface { reflectlite_test.a(func(func(int) int) func(func(int)) int); reflectlite_test.b() }",
+	// },
+	{struct {
+		x struct {
+			int32
+			int64
+		}
+	}{},
+		"struct { int32; int64 }",
+	},
+}
+
+var valueTests = []pair{
+	{new(int), "132"},
+	{new(int8), "8"},
+	{new(int16), "16"},
+	{new(int32), "32"},
+	{new(int64), "64"},
+	{new(uint), "132"},
+	{new(uint8), "8"},
+	{new(uint16), "16"},
+	{new(uint32), "32"},
+	{new(uint64), "64"},
+	{new(float32), "256.25"},
+	{new(float64), "512.125"},
+	{new(complex64), "532.125+10i"},
+	{new(complex128), "564.25+1i"},
+	{new(string), "stringy cheese"},
+	{new(bool), "true"},
+	{new(*int8), "*int8(0)"},
+	{new(**int8), "**int8(0)"},
+	{new([5]int32), "[5]int32{0, 0, 0, 0, 0}"},
+	{new(**integer), "**reflectlite_test.integer(0)"},
+	{new(map[string]int32), "map[string]int32{<can't iterate on maps>}"},
+	{new(chan<- string), "chan<- string"},
+	{new(func(a int8, b int32)), "func(int8, int32)(arg)"},
+	{new(struct {
+		c chan *int32
+		d float32
+	}),
+		"struct { c chan *int32; d float32 }{chan *int32, 0}",
+	},
+	{new(struct{ c func(chan *integer, *int8) }),
+		"struct { c func(chan *reflectlite_test.integer, *int8) }{func(chan *reflectlite_test.integer, *int8)(arg)}",
+	},
+	{new(struct {
+		a int8
+		b int32
+	}),
+		"struct { a int8; b int32 }{0, 0}",
+	},
+	{new(struct {
+		a int8
+		b int8
+		c int32
+	}),
+		"struct { a int8; b int8; c int32 }{0, 0, 0}",
+	},
+}
+
+func testType(t *testing.T, i int, typ Type, want string) {
+	s := TypeString(typ)
+	if s != want {
+		t.Errorf("#%d: have %#q, want %#q", i, s, want)
+	}
+}
+
+func testReflectType(t *testing.T, i int, typ Type, want string) {
+	s := TypeString(typ)
+	if s != want {
+		t.Errorf("#%d: have %#q, want %#q", i, s, want)
+	}
+}
+
+func TestTypes(t *testing.T) {
+	for i, tt := range typeTests {
+		testReflectType(t, i, Field(ValueOf(tt.i), 0).Type(), tt.s)
+	}
+}
+
+func TestSetValue(t *testing.T) {
+	for i, tt := range valueTests {
+		v := ValueOf(tt.i).Elem()
+		switch v.Kind() {
+		case Int:
+			v.Set(ValueOf(int(132)))
+		case Int8:
+			v.Set(ValueOf(int8(8)))
+		case Int16:
+			v.Set(ValueOf(int16(16)))
+		case Int32:
+			v.Set(ValueOf(int32(32)))
+		case Int64:
+			v.Set(ValueOf(int64(64)))
+		case Uint:
+			v.Set(ValueOf(uint(132)))
+		case Uint8:
+			v.Set(ValueOf(uint8(8)))
+		case Uint16:
+			v.Set(ValueOf(uint16(16)))
+		case Uint32:
+			v.Set(ValueOf(uint32(32)))
+		case Uint64:
+			v.Set(ValueOf(uint64(64)))
+		case Float32:
+			v.Set(ValueOf(float32(256.25)))
+		case Float64:
+			v.Set(ValueOf(512.125))
+		case Complex64:
+			v.Set(ValueOf(complex64(532.125 + 10i)))
+		case Complex128:
+			v.Set(ValueOf(complex128(564.25 + 1i)))
+		case String:
+			v.Set(ValueOf("stringy cheese"))
+		case Bool:
+			v.Set(ValueOf(true))
+		}
+		s := valueToString(v)
+		if s != tt.s {
+			t.Errorf("#%d: have %#q, want %#q", i, s, tt.s)
+		}
+	}
+}
+
+func TestCanSetField(t *testing.T) {
+	type embed struct{ x, X int }
+	type Embed struct{ x, X int }
+	type S1 struct {
+		embed
+		x, X int
+	}
+	type S2 struct {
+		*embed
+		x, X int
+	}
+	type S3 struct {
+		Embed
+		x, X int
+	}
+	type S4 struct {
+		*Embed
+		x, X int
+	}
+
+	type testCase struct {
+		index  []int
+		canSet bool
+	}
+	tests := []struct {
+		val   Value
+		cases []testCase
+	}{{
+		val: ValueOf(&S1{}),
+		cases: []testCase{
+			{[]int{0}, false},
+			{[]int{0, 0}, false},
+			{[]int{0, 1}, true},
+			{[]int{1}, false},
+			{[]int{2}, true},
+		},
+	}, {
+		val: ValueOf(&S2{embed: &embed{}}),
+		cases: []testCase{
+			{[]int{0}, false},
+			{[]int{0, 0}, false},
+			{[]int{0, 1}, true},
+			{[]int{1}, false},
+			{[]int{2}, true},
+		},
+	}, {
+		val: ValueOf(&S3{}),
+		cases: []testCase{
+			{[]int{0}, true},
+			{[]int{0, 0}, false},
+			{[]int{0, 1}, true},
+			{[]int{1}, false},
+			{[]int{2}, true},
+		},
+	}, {
+		val: ValueOf(&S4{Embed: &Embed{}}),
+		cases: []testCase{
+			{[]int{0}, true},
+			{[]int{0, 0}, false},
+			{[]int{0, 1}, true},
+			{[]int{1}, false},
+			{[]int{2}, true},
+		},
+	}}
+
+	for _, tt := range tests {
+		t.Run(tt.val.Type().Name(), func(t *testing.T) {
+			for _, tc := range tt.cases {
+				f := tt.val
+				for _, i := range tc.index {
+					if f.Kind() == Ptr {
+						f = f.Elem()
+					}
+					f = Field(f, i)
+				}
+				if got := f.CanSet(); got != tc.canSet {
+					t.Errorf("CanSet() = %v, want %v", got, tc.canSet)
+				}
+			}
+		})
+	}
+}
+
+var _i = 7
+
+var valueToStringTests = []pair{
+	{123, "123"},
+	{123.5, "123.5"},
+	{byte(123), "123"},
+	{"abc", "abc"},
+	{T{123, 456.75, "hello", &_i}, "reflectlite_test.T{123, 456.75, hello, *int(&7)}"},
+	{new(chan *T), "*chan *reflectlite_test.T(&chan *reflectlite_test.T)"},
+	{[10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, "[10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}"},
+	{&[10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, "*[10]int(&[10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})"},
+	{[]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, "[]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}"},
+	{&[]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, "*[]int(&[]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})"},
+}
+
+func TestValueToString(t *testing.T) {
+	for i, test := range valueToStringTests {
+		s := valueToString(ValueOf(test.i))
+		if s != test.s {
+			t.Errorf("#%d: have %#q, want %#q", i, s, test.s)
+		}
+	}
+}
+
+func TestPtrSetNil(t *testing.T) {
+	var i int32 = 1234
+	ip := &i
+	vip := ValueOf(&ip)
+	vip.Elem().Set(Zero(vip.Elem().Type()))
+	if ip != nil {
+		t.Errorf("got non-nil (%d), want nil", *ip)
+	}
+}
+
+func TestMapSetNil(t *testing.T) {
+	m := make(map[string]int)
+	vm := ValueOf(&m)
+	vm.Elem().Set(Zero(vm.Elem().Type()))
+	if m != nil {
+		t.Errorf("got non-nil (%p), want nil", m)
+	}
+}
+
+func TestAll(t *testing.T) {
+	testType(t, 1, TypeOf((int8)(0)), "int8")
+	testType(t, 2, TypeOf((*int8)(nil)).Elem(), "int8")
+
+	typ := TypeOf((*struct {
+		c chan *int32
+		d float32
+	})(nil))
+	testType(t, 3, typ, "*struct { c chan *int32; d float32 }")
+	etyp := typ.Elem()
+	testType(t, 4, etyp, "struct { c chan *int32; d float32 }")
+}
+
+func TestInterfaceValue(t *testing.T) {
+	var inter struct {
+		E interface{}
+	}
+	inter.E = 123.456
+	v1 := ValueOf(&inter)
+	v2 := Field(v1.Elem(), 0)
+	// assert(t, TypeString(v2.Type()), "interface {}")
+	v3 := v2.Elem()
+	assert(t, TypeString(v3.Type()), "float64")
+
+	i3 := ToInterface(v2)
+	if _, ok := i3.(float64); !ok {
+		t.Error("v2.Interface() did not return float64, got ", TypeOf(i3))
+	}
+}
+
+func TestFunctionValue(t *testing.T) {
+	var x interface{} = func() {}
+	v := ValueOf(x)
+	if fmt.Sprint(ToInterface(v)) != fmt.Sprint(x) {
+		t.Fatalf("TestFunction returned wrong pointer")
+	}
+	assert(t, TypeString(v.Type()), "func()")
+}
+
+var appendTests = []struct {
+	orig, extra []int
+}{
+	{make([]int, 2, 4), []int{22}},
+	{make([]int, 2, 4), []int{22, 33, 44}},
+}
+
+func sameInts(x, y []int) bool {
+	if len(x) != len(y) {
+		return false
+	}
+	for i, xx := range x {
+		if xx != y[i] {
+			return false
+		}
+	}
+	return true
+}
+
+func TestBigUnnamedStruct(t *testing.T) {
+	b := struct{ a, b, c, d int64 }{1, 2, 3, 4}
+	v := ValueOf(b)
+	b1 := ToInterface(v).(struct {
+		a, b, c, d int64
+	})
+	if b1.a != b.a || b1.b != b.b || b1.c != b.c || b1.d != b.d {
+		t.Errorf("ValueOf(%v).Interface().(*Big) = %v", b, b1)
+	}
+}
+
+type big struct {
+	a, b, c, d, e int64
+}
+
+func TestBigStruct(t *testing.T) {
+	b := big{1, 2, 3, 4, 5}
+	v := ValueOf(b)
+	b1 := ToInterface(v).(big)
+	if b1.a != b.a || b1.b != b.b || b1.c != b.c || b1.d != b.d || b1.e != b.e {
+		t.Errorf("ValueOf(%v).Interface().(big) = %v", b, b1)
+	}
+}
+
+type Basic struct {
+	x int
+	y float32
+}
+
+type NotBasic Basic
+
+type DeepEqualTest struct {
+	a, b interface{}
+	eq   bool
+}
+
+// Simple functions for DeepEqual tests.
+var (
+	fn1 func()             // nil.
+	fn2 func()             // nil.
+	fn3 = func() { fn1() } // Not nil.
+)
+
+type self struct{}
+
+type Loop *Loop
+type Loopy interface{}
+
+var loop1, loop2 Loop
+var loopy1, loopy2 Loopy
+
+func init() {
+	loop1 = &loop2
+	loop2 = &loop1
+
+	loopy1 = &loopy2
+	loopy2 = &loopy1
+}
+
+var typeOfTests = []DeepEqualTest{
+	// Equalities
+	{nil, nil, true},
+	{1, 1, true},
+	{int32(1), int32(1), true},
+	{0.5, 0.5, true},
+	{float32(0.5), float32(0.5), true},
+	{"hello", "hello", true},
+	{make([]int, 10), make([]int, 10), true},
+	{&[3]int{1, 2, 3}, &[3]int{1, 2, 3}, true},
+	{Basic{1, 0.5}, Basic{1, 0.5}, true},
+	{error(nil), error(nil), true},
+	{map[int]string{1: "one", 2: "two"}, map[int]string{2: "two", 1: "one"}, true},
+	{fn1, fn2, true},
+
+	// Inequalities
+	{1, 2, false},
+	{int32(1), int32(2), false},
+	{0.5, 0.6, false},
+	{float32(0.5), float32(0.6), false},
+	{"hello", "hey", false},
+	{make([]int, 10), make([]int, 11), false},
+	{&[3]int{1, 2, 3}, &[3]int{1, 2, 4}, false},
+	{Basic{1, 0.5}, Basic{1, 0.6}, false},
+	{Basic{1, 0}, Basic{2, 0}, false},
+	{map[int]string{1: "one", 3: "two"}, map[int]string{2: "two", 1: "one"}, false},
+	{map[int]string{1: "one", 2: "txo"}, map[int]string{2: "two", 1: "one"}, false},
+	{map[int]string{1: "one"}, map[int]string{2: "two", 1: "one"}, false},
+	{map[int]string{2: "two", 1: "one"}, map[int]string{1: "one"}, false},
+	{nil, 1, false},
+	{1, nil, false},
+	{fn1, fn3, false},
+	{fn3, fn3, false},
+	{[][]int{{1}}, [][]int{{2}}, false},
+	{math.NaN(), math.NaN(), false},
+	{&[1]float64{math.NaN()}, &[1]float64{math.NaN()}, false},
+	{&[1]float64{math.NaN()}, self{}, true},
+	{[]float64{math.NaN()}, []float64{math.NaN()}, false},
+	{[]float64{math.NaN()}, self{}, true},
+	{map[float64]float64{math.NaN(): 1}, map[float64]float64{1: 2}, false},
+	{map[float64]float64{math.NaN(): 1}, self{}, true},
+
+	// Nil vs empty: not the same.
+	{[]int{}, []int(nil), false},
+	{[]int{}, []int{}, true},
+	{[]int(nil), []int(nil), true},
+	{map[int]int{}, map[int]int(nil), false},
+	{map[int]int{}, map[int]int{}, true},
+	{map[int]int(nil), map[int]int(nil), true},
+
+	// Mismatched types
+	{1, 1.0, false},
+	{int32(1), int64(1), false},
+	{0.5, "hello", false},
+	{[]int{1, 2, 3}, [3]int{1, 2, 3}, false},
+	{&[3]interface{}{1, 2, 4}, &[3]interface{}{1, 2, "s"}, false},
+	{Basic{1, 0.5}, NotBasic{1, 0.5}, false},
+	{map[uint]string{1: "one", 2: "two"}, map[int]string{2: "two", 1: "one"}, false},
+
+	// Possible loops.
+	{&loop1, &loop1, true},
+	{&loop1, &loop2, true},
+	{&loopy1, &loopy1, true},
+	{&loopy1, &loopy2, true},
+}
+
+func TestTypeOf(t *testing.T) {
+	// Special case for nil
+	if typ := TypeOf(nil); typ != nil {
+		t.Errorf("expected nil type for nil value; got %v", typ)
+	}
+	for _, test := range typeOfTests {
+		v := ValueOf(test.a)
+		if !v.IsValid() {
+			continue
+		}
+		typ := TypeOf(test.a)
+		if typ != v.Type() {
+			t.Errorf("TypeOf(%v) = %v, but ValueOf(%v).Type() = %v", test.a, typ, test.a, v.Type())
+		}
+	}
+}
+
+func Nil(a interface{}, t *testing.T) {
+	n := Field(ValueOf(a), 0)
+	if !n.IsNil() {
+		t.Errorf("%v should be nil", a)
+	}
+}
+
+func NotNil(a interface{}, t *testing.T) {
+	n := Field(ValueOf(a), 0)
+	if n.IsNil() {
+		t.Errorf("value of type %v should not be nil", TypeString(ValueOf(a).Type()))
+	}
+}
+
+func TestIsNil(t *testing.T) {
+	// These implement IsNil.
+	// Wrap in extra struct to hide interface type.
+	doNil := []interface{}{
+		struct{ x *int }{},
+		struct{ x interface{} }{},
+		struct{ x map[string]int }{},
+		struct{ x func() bool }{},
+		struct{ x chan int }{},
+		struct{ x []string }{},
+		struct{ x unsafe.Pointer }{},
+	}
+	for _, ts := range doNil {
+		ty := TField(TypeOf(ts), 0)
+		v := Zero(ty)
+		v.IsNil() // panics if not okay to call
+	}
+
+	// Check the implementations
+	var pi struct {
+		x *int
+	}
+	Nil(pi, t)
+	pi.x = new(int)
+	NotNil(pi, t)
+
+	var si struct {
+		x []int
+	}
+	Nil(si, t)
+	si.x = make([]int, 10)
+	NotNil(si, t)
+
+	var ci struct {
+		x chan int
+	}
+	Nil(ci, t)
+	ci.x = make(chan int)
+	NotNil(ci, t)
+
+	var mi struct {
+		x map[int]int
+	}
+	Nil(mi, t)
+	mi.x = make(map[int]int)
+	NotNil(mi, t)
+
+	var ii struct {
+		x interface{}
+	}
+	Nil(ii, t)
+	ii.x = 2
+	NotNil(ii, t)
+
+	var fi struct {
+		x func(t *testing.T)
+	}
+	Nil(fi, t)
+	fi.x = TestIsNil
+	NotNil(fi, t)
+}
+
+// Indirect returns the value that v points to.
+// If v is a nil pointer, Indirect returns a zero Value.
+// If v is not a pointer, Indirect returns v.
+func Indirect(v Value) Value {
+	if v.Kind() != Ptr {
+		return v
+	}
+	return v.Elem()
+}
+
+func TestNilPtrValueSub(t *testing.T) {
+	var pi *int
+	if pv := ValueOf(pi); pv.Elem().IsValid() {
+		t.Error("ValueOf((*int)(nil)).Elem().IsValid()")
+	}
+}
+
+type Point struct {
+	x, y int
+}
+
+// This will be index 0.
+func (p Point) AnotherMethod(scale int) int {
+	return -1
+}
+
+// This will be index 1.
+func (p Point) Dist(scale int) int {
+	//println("Point.Dist", p.x, p.y, scale)
+	return p.x*p.x*scale + p.y*p.y*scale
+}
+
+// This will be index 2.
+func (p Point) GCMethod(k int) int {
+	runtime.GC()
+	return k + p.x
+}
+
+// This will be index 3.
+func (p Point) NoArgs() {
+	// Exercise no-argument/no-result paths.
+}
+
+// This will be index 4.
+func (p Point) TotalDist(points ...Point) int {
+	tot := 0
+	for _, q := range points {
+		dx := q.x - p.x
+		dy := q.y - p.y
+		tot += dx*dx + dy*dy // Should call Sqrt, but it's just a test.
+
+	}
+	return tot
+}
+
+type D1 struct {
+	d int
+}
+type D2 struct {
+	d int
+}
+
+func TestImportPath(t *testing.T) {
+	tests := []struct {
+		t    Type
+		path string
+	}{
+		{TypeOf(&base64.Encoding{}).Elem(), "encoding/base64"},
+		{TypeOf(int(0)), ""},
+		{TypeOf(int8(0)), ""},
+		{TypeOf(int16(0)), ""},
+		{TypeOf(int32(0)), ""},
+		{TypeOf(int64(0)), ""},
+		{TypeOf(uint(0)), ""},
+		{TypeOf(uint8(0)), ""},
+		{TypeOf(uint16(0)), ""},
+		{TypeOf(uint32(0)), ""},
+		{TypeOf(uint64(0)), ""},
+		{TypeOf(uintptr(0)), ""},
+		{TypeOf(float32(0)), ""},
+		{TypeOf(float64(0)), ""},
+		{TypeOf(complex64(0)), ""},
+		{TypeOf(complex128(0)), ""},
+		{TypeOf(byte(0)), ""},
+		{TypeOf(rune(0)), ""},
+		{TypeOf([]byte(nil)), ""},
+		{TypeOf([]rune(nil)), ""},
+		{TypeOf(string("")), ""},
+		{TypeOf((*interface{})(nil)).Elem(), ""},
+		{TypeOf((*byte)(nil)), ""},
+		{TypeOf((*rune)(nil)), ""},
+		{TypeOf((*int64)(nil)), ""},
+		{TypeOf(map[string]int{}), ""},
+		{TypeOf((*error)(nil)).Elem(), ""},
+		{TypeOf((*Point)(nil)), ""},
+		{TypeOf((*Point)(nil)).Elem(), "internal/reflectlite_test"},
+	}
+	for _, test := range tests {
+		if path := test.t.PkgPath(); path != test.path {
+			t.Errorf("%v.PkgPath() = %q, want %q", test.t, path, test.path)
+		}
+	}
+}
+
+func noAlloc(t *testing.T, n int, f func(int)) {
+	if testing.Short() {
+		t.Skip("skipping malloc count in short mode")
+	}
+	if runtime.GOMAXPROCS(0) > 1 {
+		t.Skip("skipping; GOMAXPROCS>1")
+	}
+	i := -1
+	allocs := testing.AllocsPerRun(n, func() {
+		f(i)
+		i++
+	})
+	if allocs > 0 {
+		t.Errorf("%d iterations: got %v mallocs, want 0", n, allocs)
+	}
+}
+
+func TestAllocations(t *testing.T) {
+	noAlloc(t, 100, func(j int) {
+		var i interface{}
+		var v Value
+
+		// We can uncomment this when compiler escape analysis
+		// is good enough to see that the integer assigned to i
+		// does not escape and therefore need not be allocated.
+		//
+		// i = 42 + j
+		// v = ValueOf(i)
+		// if int(v.Int()) != 42+j {
+		// 	panic("wrong int")
+		// }
+
+		i = func(j int) int { return j }
+		v = ValueOf(i)
+		if ToInterface(v).(func(int) int)(j) != j {
+			panic("wrong result")
+		}
+	})
+}
+
+func TestSetPanic(t *testing.T) {
+	ok := func(f func()) { f() }
+	bad := shouldPanic
+	clear := func(v Value) { v.Set(Zero(v.Type())) }
+
+	type t0 struct {
+		W int
+	}
+
+	type t1 struct {
+		Y int
+		t0
+	}
+
+	type T2 struct {
+		Z       int
+		namedT0 t0
+	}
+
+	type T struct {
+		X int
+		t1
+		T2
+		NamedT1 t1
+		NamedT2 T2
+		namedT1 t1
+		namedT2 T2
+	}
+
+	// not addressable
+	v := ValueOf(T{})
+	bad(func() { clear(Field(v, 0)) })                     // .X
+	bad(func() { clear(Field(v, 1)) })                     // .t1
+	bad(func() { clear(Field(Field(v, 1), 0)) })           // .t1.Y
+	bad(func() { clear(Field(Field(v, 1), 1)) })           // .t1.t0
+	bad(func() { clear(Field(Field(Field(v, 1), 1), 0)) }) // .t1.t0.W
+	bad(func() { clear(Field(v, 2)) })                     // .T2
+	bad(func() { clear(Field(Field(v, 2), 0)) })           // .T2.Z
+	bad(func() { clear(Field(Field(v, 2), 1)) })           // .T2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 2), 1), 0)) }) // .T2.namedT0.W
+	bad(func() { clear(Field(v, 3)) })                     // .NamedT1
+	bad(func() { clear(Field(Field(v, 3), 0)) })           // .NamedT1.Y
+	bad(func() { clear(Field(Field(v, 3), 1)) })           // .NamedT1.t0
+	bad(func() { clear(Field(Field(Field(v, 3), 1), 0)) }) // .NamedT1.t0.W
+	bad(func() { clear(Field(v, 4)) })                     // .NamedT2
+	bad(func() { clear(Field(Field(v, 4), 0)) })           // .NamedT2.Z
+	bad(func() { clear(Field(Field(v, 4), 1)) })           // .NamedT2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 4), 1), 0)) }) // .NamedT2.namedT0.W
+	bad(func() { clear(Field(v, 5)) })                     // .namedT1
+	bad(func() { clear(Field(Field(v, 5), 0)) })           // .namedT1.Y
+	bad(func() { clear(Field(Field(v, 5), 1)) })           // .namedT1.t0
+	bad(func() { clear(Field(Field(Field(v, 5), 1), 0)) }) // .namedT1.t0.W
+	bad(func() { clear(Field(v, 6)) })                     // .namedT2
+	bad(func() { clear(Field(Field(v, 6), 0)) })           // .namedT2.Z
+	bad(func() { clear(Field(Field(v, 6), 1)) })           // .namedT2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 6), 1), 0)) }) // .namedT2.namedT0.W
+
+	// addressable
+	v = ValueOf(&T{}).Elem()
+	ok(func() { clear(Field(v, 0)) })                      // .X
+	bad(func() { clear(Field(v, 1)) })                     // .t1
+	ok(func() { clear(Field(Field(v, 1), 0)) })            // .t1.Y
+	bad(func() { clear(Field(Field(v, 1), 1)) })           // .t1.t0
+	ok(func() { clear(Field(Field(Field(v, 1), 1), 0)) })  // .t1.t0.W
+	ok(func() { clear(Field(v, 2)) })                      // .T2
+	ok(func() { clear(Field(Field(v, 2), 0)) })            // .T2.Z
+	bad(func() { clear(Field(Field(v, 2), 1)) })           // .T2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 2), 1), 0)) }) // .T2.namedT0.W
+	ok(func() { clear(Field(v, 3)) })                      // .NamedT1
+	ok(func() { clear(Field(Field(v, 3), 0)) })            // .NamedT1.Y
+	bad(func() { clear(Field(Field(v, 3), 1)) })           // .NamedT1.t0
+	ok(func() { clear(Field(Field(Field(v, 3), 1), 0)) })  // .NamedT1.t0.W
+	ok(func() { clear(Field(v, 4)) })                      // .NamedT2
+	ok(func() { clear(Field(Field(v, 4), 0)) })            // .NamedT2.Z
+	bad(func() { clear(Field(Field(v, 4), 1)) })           // .NamedT2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 4), 1), 0)) }) // .NamedT2.namedT0.W
+	bad(func() { clear(Field(v, 5)) })                     // .namedT1
+	bad(func() { clear(Field(Field(v, 5), 0)) })           // .namedT1.Y
+	bad(func() { clear(Field(Field(v, 5), 1)) })           // .namedT1.t0
+	bad(func() { clear(Field(Field(Field(v, 5), 1), 0)) }) // .namedT1.t0.W
+	bad(func() { clear(Field(v, 6)) })                     // .namedT2
+	bad(func() { clear(Field(Field(v, 6), 0)) })           // .namedT2.Z
+	bad(func() { clear(Field(Field(v, 6), 1)) })           // .namedT2.namedT0
+	bad(func() { clear(Field(Field(Field(v, 6), 1), 0)) }) // .namedT2.namedT0.W
+}
+
+func shouldPanic(f func()) {
+	defer func() {
+		if recover() == nil {
+			panic("did not panic")
+		}
+	}()
+	f()
+}
+
+type S struct {
+	i1 int64
+	i2 int64
+}
+
+func TestBigZero(t *testing.T) {
+	const size = 1 << 10
+	var v [size]byte
+	z := ToInterface(Zero(ValueOf(v).Type())).([size]byte)
+	for i := 0; i < size; i++ {
+		if z[i] != 0 {
+			t.Fatalf("Zero object not all zero, index %d", i)
+		}
+	}
+}
+
+func TestInvalid(t *testing.T) {
+	// Used to have inconsistency between IsValid() and Kind() != Invalid.
+	type T struct{ v interface{} }
+
+	v := Field(ValueOf(T{}), 0)
+	if v.IsValid() != true || v.Kind() != Interface {
+		t.Errorf("field: IsValid=%v, Kind=%v, want true, Interface", v.IsValid(), v.Kind())
+	}
+	v = v.Elem()
+	if v.IsValid() != false || v.Kind() != Invalid {
+		t.Errorf("field elem: IsValid=%v, Kind=%v, want false, Invalid", v.IsValid(), v.Kind())
+	}
+}
+
+type TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678 int
+
+type nameTest struct {
+	v    interface{}
+	want string
+}
+
+var nameTests = []nameTest{
+	{(*int32)(nil), "int32"},
+	{(*D1)(nil), "D1"},
+	{(*[]D1)(nil), ""},
+	{(*chan D1)(nil), ""},
+	{(*func() D1)(nil), ""},
+	{(*<-chan D1)(nil), ""},
+	{(*chan<- D1)(nil), ""},
+	{(*interface{})(nil), ""},
+	{(*interface {
+		F()
+	})(nil), ""},
+	{(*TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678)(nil), "TheNameOfThisTypeIsExactly255BytesLongSoWhenTheCompilerPrependsTheReflectTestPackageNameAndExtraStarTheLinkerRuntimeAndReflectPackagesWillHaveToCorrectlyDecodeTheSecondLengthByte0123456789_0123456789_0123456789_0123456789_0123456789_012345678"},
+}
+
+func TestNames(t *testing.T) {
+	for _, test := range nameTests {
+		typ := TypeOf(test.v).Elem()
+		if got := typ.Name(); got != test.want {
+			t.Errorf("%v Name()=%q, want %q", typ, got, test.want)
+		}
+	}
+}
+
+// TestUnaddressableField tests that the reflect package will not allow
+// a type from another package to be used as a named type with an
+// unexported field.
+//
+// This ensures that unexported fields cannot be modified by other packages.
+func TestUnaddressableField(t *testing.T) {
+	var b Buffer // type defined in reflect, a different package
+	var localBuffer struct {
+		buf []byte
+	}
+	lv := ValueOf(&localBuffer).Elem()
+	rv := ValueOf(b)
+	shouldPanic(func() {
+		lv.Set(rv)
+	})
+}
+
+type Tint int
+
+type Tint2 = Tint
+
+type Talias1 struct {
+	byte
+	uint8
+	int
+	int32
+	rune
+}
+
+type Talias2 struct {
+	Tint
+	Tint2
+}
+
+func TestAliasNames(t *testing.T) {
+	t1 := Talias1{byte: 1, uint8: 2, int: 3, int32: 4, rune: 5}
+	out := fmt.Sprintf("%#v", t1)
+	want := "reflectlite_test.Talias1{byte:0x1, uint8:0x2, int:3, int32:4, rune:5}"
+	if out != want {
+		t.Errorf("Talias1 print:\nhave: %s\nwant: %s", out, want)
+	}
+
+	t2 := Talias2{Tint: 1, Tint2: 2}
+	out = fmt.Sprintf("%#v", t2)
+	want = "reflectlite_test.Talias2{Tint:1, Tint2:2}"
+	if out != want {
+		t.Errorf("Talias2 print:\nhave: %s\nwant: %s", out, want)
+	}
+}
diff --git a/src/internal/reflectlite/asm.s b/src/internal/reflectlite/asm.s
new file mode 100644
index 0000000..a7b69b6
--- /dev/null
+++ b/src/internal/reflectlite/asm.s
@@ -0,0 +1,5 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Trigger build without complete flag.
+\ No newline at end of file
diff --git a/src/internal/reflectlite/export_test.go b/src/internal/reflectlite/export_test.go
new file mode 100644
index 0000000..354ea9d
--- /dev/null
+++ b/src/internal/reflectlite/export_test.go
@@ -0,0 +1,115 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite
+
+import (
+	"unsafe"
+)
+
+// Field returns the i'th field of the struct v.
+// It panics if v's Kind is not Struct or i is out of range.
+func Field(v Value, i int) Value {
+	if v.kind() != Struct {
+		panic(&ValueError{"reflect.Value.Field", v.kind()})
+	}
+	tt := (*structType)(unsafe.Pointer(v.typ))
+	if uint(i) >= uint(len(tt.fields)) {
+		panic("reflect: Field index out of range")
+	}
+	field := &tt.fields[i]
+	typ := field.typ
+
+	// Inherit permission bits from v, but clear flagEmbedRO.
+	fl := v.flag&(flagStickyRO|flagIndir|flagAddr) | flag(typ.Kind())
+	// Using an unexported field forces flagRO.
+	if !field.name.isExported() {
+		if field.embedded() {
+			fl |= flagEmbedRO
+		} else {
+			fl |= flagStickyRO
+		}
+	}
+	// Either flagIndir is set and v.ptr points at struct,
+	// or flagIndir is not set and v.ptr is the actual struct data.
+	// In the former case, we want v.ptr + offset.
+	// In the latter case, we must have field.offset = 0,
+	// so v.ptr + field.offset is still the correct address.
+	ptr := add(v.ptr, field.offset(), "same as non-reflect &v.field")
+	return Value{typ, ptr, fl}
+}
+
+func TField(typ Type, i int) Type {
+	t := typ.(*rtype)
+	if t.Kind() != Struct {
+		panic("reflect: Field of non-struct type")
+	}
+	tt := (*structType)(unsafe.Pointer(t))
+
+	return StructFieldType(tt, i)
+}
+
+// Field returns the i'th struct field.
+func StructFieldType(t *structType, i int) Type {
+	if i < 0 || i >= len(t.fields) {
+		panic("reflect: Field index out of bounds")
+	}
+	p := &t.fields[i]
+	return toType(p.typ)
+}
+
+// Zero returns a Value representing the zero value for the specified type.
+// The result is different from the zero value of the Value struct,
+// which represents no value at all.
+// For example, Zero(TypeOf(42)) returns a Value with Kind Int and value 0.
+// The returned value is neither addressable nor settable.
+func Zero(typ Type) Value {
+	if typ == nil {
+		panic("reflect: Zero(nil)")
+	}
+	t := typ.(*rtype)
+	fl := flag(t.Kind())
+	if ifaceIndir(t) {
+		return Value{t, unsafe_New(t), fl | flagIndir}
+	}
+	return Value{t, nil, fl}
+}
+
+// ToInterface returns v's current value as an interface{}.
+// It is equivalent to:
+//	var i interface{} = (v's underlying value)
+// It panics if the Value was obtained by accessing
+// unexported struct fields.
+func ToInterface(v Value) (i interface{}) {
+	return valueInterface(v)
+}
+
+type EmbedWithUnexpMeth struct{}
+
+func (EmbedWithUnexpMeth) f() {}
+
+type pinUnexpMeth interface {
+	f()
+}
+
+var pinUnexpMethI = pinUnexpMeth(EmbedWithUnexpMeth{})
+
+func FirstMethodNameBytes(t Type) *byte {
+	_ = pinUnexpMethI
+
+	ut := t.uncommon()
+	if ut == nil {
+		panic("type has no methods")
+	}
+	m := ut.methods()[0]
+	mname := t.(*rtype).nameOff(m.name)
+	if *mname.data(0, "name flag field")&(1<<2) == 0 {
+		panic("method name does not have pkgPath *string")
+	}
+	return mname.bytes
+}
+
+type Buffer struct {
+	buf []byte
+}
diff --git a/src/internal/reflectlite/reflect_mirror_test.go b/src/internal/reflectlite/reflect_mirror_test.go
new file mode 100644
index 0000000..9b28b13
--- /dev/null
+++ b/src/internal/reflectlite/reflect_mirror_test.go
@@ -0,0 +1,132 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite_test
+
+import (
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/token"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"sync"
+	"testing"
+)
+
+var typeNames = []string{
+	"rtype",
+	"uncommonType",
+	"arrayType",
+	"chanType",
+	"funcType",
+	"interfaceType",
+	"mapType",
+	"ptrType",
+	"sliceType",
+	"structType",
+}
+
+type visitor struct {
+	m map[string]map[string]bool
+}
+
+func newVisitor() visitor {
+	v := visitor{}
+	v.m = make(map[string]map[string]bool)
+
+	return v
+}
+func (v visitor) filter(name string) bool {
+	for _, typeName := range typeNames {
+		if typeName == name {
+			return true
+		}
+	}
+	return false
+}
+
+func (v visitor) Visit(n ast.Node) ast.Visitor {
+	switch x := n.(type) {
+	case *ast.TypeSpec:
+		if v.filter(x.Name.String()) {
+			if st, ok := x.Type.(*ast.StructType); ok {
+				v.m[x.Name.String()] = make(map[string]bool)
+				for _, field := range st.Fields.List {
+					k := fmt.Sprintf("%s", field.Type)
+					if len(field.Names) > 0 {
+						k = field.Names[0].Name
+					}
+					v.m[x.Name.String()][k] = true
+				}
+			}
+		}
+	}
+	return v
+}
+
+func loadTypes(path, pkgName string, v visitor) {
+	fset := token.NewFileSet()
+
+	filter := func(fi fs.FileInfo) bool {
+		return strings.HasSuffix(fi.Name(), ".go")
+	}
+	pkgs, err := parser.ParseDir(fset, path, filter, 0)
+	if err != nil {
+		panic(err)
+	}
+
+	pkg := pkgs[pkgName]
+
+	for _, f := range pkg.Files {
+		ast.Walk(v, f)
+	}
+}
+
+func TestMirrorWithReflect(t *testing.T) {
+	reflectDir := filepath.Join(runtime.GOROOT(), "src", "reflect")
+	if _, err := os.Stat(reflectDir); os.IsNotExist(err) {
+		// On some mobile builders, the test binary executes on a machine without a
+		// complete GOROOT source tree.
+		t.Skipf("GOROOT source not present")
+	}
+
+	var wg sync.WaitGroup
+	rl, r := newVisitor(), newVisitor()
+
+	for _, tc := range []struct {
+		path, pkg string
+		v         visitor
+	}{
+		{".", "reflectlite", rl},
+		{reflectDir, "reflect", r},
+	} {
+		tc := tc
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			loadTypes(tc.path, tc.pkg, tc.v)
+		}()
+	}
+	wg.Wait()
+
+	if len(rl.m) != len(r.m) {
+		t.Fatalf("number of types mismatch, reflect: %d, reflectlite: %d", len(r.m), len(rl.m))
+	}
+
+	for typName := range r.m {
+		if len(r.m[typName]) != len(rl.m[typName]) {
+			t.Errorf("type %s number of fields mismatch, reflect: %d, reflectlite: %d", typName, len(r.m[typName]), len(rl.m[typName]))
+			continue
+		}
+		for field := range r.m[typName] {
+			if _, ok := rl.m[typName][field]; !ok {
+				t.Errorf(`Field mismatch, reflect have "%s", relectlite does not.`, field)
+			}
+		}
+	}
+}
diff --git a/src/internal/reflectlite/set_test.go b/src/internal/reflectlite/set_test.go
new file mode 100644
index 0000000..a610499
--- /dev/null
+++ b/src/internal/reflectlite/set_test.go
@@ -0,0 +1,101 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite_test
+
+import (
+	"bytes"
+	"go/ast"
+	"go/token"
+	. "internal/reflectlite"
+	"io"
+	"testing"
+)
+
+func TestImplicitSetConversion(t *testing.T) {
+	// Assume TestImplicitMapConversion covered the basics.
+	// Just make sure conversions are being applied at all.
+	var r io.Reader
+	b := new(bytes.Buffer)
+	rv := ValueOf(&r).Elem()
+	rv.Set(ValueOf(b))
+	if r != b {
+		t.Errorf("after Set: r=%T(%v)", r, r)
+	}
+}
+
+var implementsTests = []struct {
+	x interface{}
+	t interface{}
+	b bool
+}{
+	{new(*bytes.Buffer), new(io.Reader), true},
+	{new(bytes.Buffer), new(io.Reader), false},
+	{new(*bytes.Buffer), new(io.ReaderAt), false},
+	{new(*ast.Ident), new(ast.Expr), true},
+	{new(*notAnExpr), new(ast.Expr), false},
+	{new(*ast.Ident), new(notASTExpr), false},
+	{new(notASTExpr), new(ast.Expr), false},
+	{new(ast.Expr), new(notASTExpr), false},
+	{new(*notAnExpr), new(notASTExpr), true},
+	{new(mapError), new(error), true},
+	{new(*mapError), new(error), true},
+}
+
+type notAnExpr struct{}
+
+func (notAnExpr) Pos() token.Pos { return token.NoPos }
+func (notAnExpr) End() token.Pos { return token.NoPos }
+func (notAnExpr) exprNode()      {}
+
+type notASTExpr interface {
+	Pos() token.Pos
+	End() token.Pos
+	exprNode()
+}
+
+type mapError map[string]string
+
+func (mapError) Error() string { return "mapError" }
+
+var _ error = mapError{}
+var _ error = new(mapError)
+
+func TestImplements(t *testing.T) {
+	for _, tt := range implementsTests {
+		xv := TypeOf(tt.x).Elem()
+		xt := TypeOf(tt.t).Elem()
+		if b := xv.Implements(xt); b != tt.b {
+			t.Errorf("(%s).Implements(%s) = %v, want %v", TypeString(xv), TypeString(xt), b, tt.b)
+		}
+	}
+}
+
+var assignableTests = []struct {
+	x interface{}
+	t interface{}
+	b bool
+}{
+	{new(chan int), new(<-chan int), true},
+	{new(<-chan int), new(chan int), false},
+	{new(*int), new(IntPtr), true},
+	{new(IntPtr), new(*int), true},
+	{new(IntPtr), new(IntPtr1), false},
+	{new(Ch), new(<-chan interface{}), true},
+	// test runs implementsTests too
+}
+
+type IntPtr *int
+type IntPtr1 *int
+type Ch <-chan interface{}
+
+func TestAssignableTo(t *testing.T) {
+	for i, tt := range append(assignableTests, implementsTests...) {
+		xv := TypeOf(tt.x).Elem()
+		xt := TypeOf(tt.t).Elem()
+		if b := xv.AssignableTo(xt); b != tt.b {
+			t.Errorf("%d:AssignableTo: got %v, want %v", i, b, tt.b)
+		}
+	}
+}
diff --git a/src/internal/reflectlite/swapper.go b/src/internal/reflectlite/swapper.go
new file mode 100644
index 0000000..6330ab2
--- /dev/null
+++ b/src/internal/reflectlite/swapper.go
@@ -0,0 +1,77 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite
+
+import (
+	"internal/unsafeheader"
+	"unsafe"
+)
+
+// Swapper returns a function that swaps the elements in the provided
+// slice.
+//
+// Swapper panics if the provided interface is not a slice.
+func Swapper(slice interface{}) func(i, j int) {
+	v := ValueOf(slice)
+	if v.Kind() != Slice {
+		panic(&ValueError{Method: "Swapper", Kind: v.Kind()})
+	}
+	// Fast path for slices of size 0 and 1. Nothing to swap.
+	switch v.Len() {
+	case 0:
+		return func(i, j int) { panic("reflect: slice index out of range") }
+	case 1:
+		return func(i, j int) {
+			if i != 0 || j != 0 {
+				panic("reflect: slice index out of range")
+			}
+		}
+	}
+
+	typ := v.Type().Elem().(*rtype)
+	size := typ.Size()
+	hasPtr := typ.ptrdata != 0
+
+	// Some common & small cases, without using memmove:
+	if hasPtr {
+		if size == ptrSize {
+			ps := *(*[]unsafe.Pointer)(v.ptr)
+			return func(i, j int) { ps[i], ps[j] = ps[j], ps[i] }
+		}
+		if typ.Kind() == String {
+			ss := *(*[]string)(v.ptr)
+			return func(i, j int) { ss[i], ss[j] = ss[j], ss[i] }
+		}
+	} else {
+		switch size {
+		case 8:
+			is := *(*[]int64)(v.ptr)
+			return func(i, j int) { is[i], is[j] = is[j], is[i] }
+		case 4:
+			is := *(*[]int32)(v.ptr)
+			return func(i, j int) { is[i], is[j] = is[j], is[i] }
+		case 2:
+			is := *(*[]int16)(v.ptr)
+			return func(i, j int) { is[i], is[j] = is[j], is[i] }
+		case 1:
+			is := *(*[]int8)(v.ptr)
+			return func(i, j int) { is[i], is[j] = is[j], is[i] }
+		}
+	}
+
+	s := (*unsafeheader.Slice)(v.ptr)
+	tmp := unsafe_New(typ) // swap scratch space
+
+	return func(i, j int) {
+		if uint(i) >= uint(s.Len) || uint(j) >= uint(s.Len) {
+			panic("reflect: slice index out of range")
+		}
+		val1 := arrayAt(s.Data, i, size, "i < s.Len")
+		val2 := arrayAt(s.Data, j, size, "j < s.Len")
+		typedmemmove(typ, tmp, val1)
+		typedmemmove(typ, val1, val2)
+		typedmemmove(typ, val2, tmp)
+	}
+}
diff --git a/src/internal/reflectlite/tostring_test.go b/src/internal/reflectlite/tostring_test.go
new file mode 100644
index 0000000..a1e5dae
--- /dev/null
+++ b/src/internal/reflectlite/tostring_test.go
@@ -0,0 +1,98 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Formatting of reflection types and values for debugging.
+// Not defined as methods so they do not need to be linked into most binaries;
+// the functions are not used by the library itself, only in tests.
+
+package reflectlite_test
+
+import (
+	. "internal/reflectlite"
+	"reflect"
+	"strconv"
+)
+
+// valueToString returns a textual representation of the reflection value val.
+// For debugging only.
+func valueToString(v Value) string {
+	return valueToStringImpl(reflect.ValueOf(ToInterface(v)))
+}
+
+func valueToStringImpl(val reflect.Value) string {
+	var str string
+	if !val.IsValid() {
+		return "<zero Value>"
+	}
+	typ := val.Type()
+	switch val.Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return strconv.FormatInt(val.Int(), 10)
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return strconv.FormatUint(val.Uint(), 10)
+	case reflect.Float32, reflect.Float64:
+		return strconv.FormatFloat(val.Float(), 'g', -1, 64)
+	case reflect.Complex64, reflect.Complex128:
+		c := val.Complex()
+		return strconv.FormatFloat(real(c), 'g', -1, 64) + "+" + strconv.FormatFloat(imag(c), 'g', -1, 64) + "i"
+	case reflect.String:
+		return val.String()
+	case reflect.Bool:
+		if val.Bool() {
+			return "true"
+		} else {
+			return "false"
+		}
+	case reflect.Ptr:
+		v := val
+		str = typ.String() + "("
+		if v.IsNil() {
+			str += "0"
+		} else {
+			str += "&" + valueToStringImpl(v.Elem())
+		}
+		str += ")"
+		return str
+	case reflect.Array, reflect.Slice:
+		v := val
+		str += typ.String()
+		str += "{"
+		for i := 0; i < v.Len(); i++ {
+			if i > 0 {
+				str += ", "
+			}
+			str += valueToStringImpl(v.Index(i))
+		}
+		str += "}"
+		return str
+	case reflect.Map:
+		str += typ.String()
+		str += "{"
+		str += "<can't iterate on maps>"
+		str += "}"
+		return str
+	case reflect.Chan:
+		str = typ.String()
+		return str
+	case reflect.Struct:
+		t := typ
+		v := val
+		str += t.String()
+		str += "{"
+		for i, n := 0, v.NumField(); i < n; i++ {
+			if i > 0 {
+				str += ", "
+			}
+			str += valueToStringImpl(v.Field(i))
+		}
+		str += "}"
+		return str
+	case reflect.Interface:
+		return typ.String() + "(" + valueToStringImpl(val.Elem()) + ")"
+	case reflect.Func:
+		return typ.String() + "(arg)"
+	default:
+		panic("valueToString: can't print type " + typ.String())
+	}
+}
diff --git a/src/internal/reflectlite/type.go b/src/internal/reflectlite/type.go
new file mode 100644
index 0000000..b1899b0
--- /dev/null
+++ b/src/internal/reflectlite/type.go
@@ -0,0 +1,970 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package reflectlite implements lightweight version of reflect, not using
+// any package except for "runtime" and "unsafe".
+package reflectlite
+
+import (
+	"internal/unsafeheader"
+	"unsafe"
+)
+
+// Type is the representation of a Go type.
+//
+// Not all methods apply to all kinds of types. Restrictions,
+// if any, are noted in the documentation for each method.
+// Use the Kind method to find out the kind of type before
+// calling kind-specific methods. Calling a method
+// inappropriate to the kind of type causes a run-time panic.
+//
+// Type values are comparable, such as with the == operator,
+// so they can be used as map keys.
+// Two Type values are equal if they represent identical types.
+type Type interface {
+	// Methods applicable to all types.
+
+	// Name returns the type's name within its package for a defined type.
+	// For other (non-defined) types it returns the empty string.
+	Name() string
+
+	// PkgPath returns a defined type's package path, that is, the import path
+	// that uniquely identifies the package, such as "encoding/base64".
+	// If the type was predeclared (string, error) or not defined (*T, struct{},
+	// []int, or A where A is an alias for a non-defined type), the package path
+	// will be the empty string.
+	PkgPath() string
+
+	// Size returns the number of bytes needed to store
+	// a value of the given type; it is analogous to unsafe.Sizeof.
+	Size() uintptr
+
+	// Kind returns the specific kind of this type.
+	Kind() Kind
+
+	// Implements reports whether the type implements the interface type u.
+	Implements(u Type) bool
+
+	// AssignableTo reports whether a value of the type is assignable to type u.
+	AssignableTo(u Type) bool
+
+	// Comparable reports whether values of this type are comparable.
+	Comparable() bool
+
+	// String returns a string representation of the type.
+	// The string representation may use shortened package names
+	// (e.g., base64 instead of "encoding/base64") and is not
+	// guaranteed to be unique among types. To test for type identity,
+	// compare the Types directly.
+	String() string
+
+	// Elem returns a type's element type.
+	// It panics if the type's Kind is not Ptr.
+	Elem() Type
+
+	common() *rtype
+	uncommon() *uncommonType
+}
+
+/*
+ * These data structures are known to the compiler (../../cmd/internal/reflectdata/reflect.go).
+ * A few are known to ../runtime/type.go to convey to debuggers.
+ * They are also known to ../runtime/type.go.
+ */
+
+// A Kind represents the specific kind of type that a Type represents.
+// The zero Kind is not a valid kind.
+type Kind uint
+
+const (
+	Invalid Kind = iota
+	Bool
+	Int
+	Int8
+	Int16
+	Int32
+	Int64
+	Uint
+	Uint8
+	Uint16
+	Uint32
+	Uint64
+	Uintptr
+	Float32
+	Float64
+	Complex64
+	Complex128
+	Array
+	Chan
+	Func
+	Interface
+	Map
+	Ptr
+	Slice
+	String
+	Struct
+	UnsafePointer
+)
+
+// tflag is used by an rtype to signal what extra type information is
+// available in the memory directly following the rtype value.
+//
+// tflag values must be kept in sync with copies in:
+//	cmd/compile/internal/reflectdata/reflect.go
+//	cmd/link/internal/ld/decodesym.go
+//	runtime/type.go
+type tflag uint8
+
+const (
+	// tflagUncommon means that there is a pointer, *uncommonType,
+	// just beyond the outer type structure.
+	//
+	// For example, if t.Kind() == Struct and t.tflag&tflagUncommon != 0,
+	// then t has uncommonType data and it can be accessed as:
+	//
+	//	type tUncommon struct {
+	//		structType
+	//		u uncommonType
+	//	}
+	//	u := &(*tUncommon)(unsafe.Pointer(t)).u
+	tflagUncommon tflag = 1 << 0
+
+	// tflagExtraStar means the name in the str field has an
+	// extraneous '*' prefix. This is because for most types T in
+	// a program, the type *T also exists and reusing the str data
+	// saves binary size.
+	tflagExtraStar tflag = 1 << 1
+
+	// tflagNamed means the type has a name.
+	tflagNamed tflag = 1 << 2
+
+	// tflagRegularMemory means that equal and hash functions can treat
+	// this type as a single region of t.size bytes.
+	tflagRegularMemory tflag = 1 << 3
+)
+
+// rtype is the common implementation of most values.
+// It is embedded in other struct types.
+//
+// rtype must be kept in sync with ../runtime/type.go:/^type._type.
+type rtype struct {
+	size       uintptr
+	ptrdata    uintptr // number of bytes in the type that can contain pointers
+	hash       uint32  // hash of type; avoids computation in hash tables
+	tflag      tflag   // extra type information flags
+	align      uint8   // alignment of variable with this type
+	fieldAlign uint8   // alignment of struct field with this type
+	kind       uint8   // enumeration for C
+	// function for comparing objects of this type
+	// (ptr to object A, ptr to object B) -> ==?
+	equal     func(unsafe.Pointer, unsafe.Pointer) bool
+	gcdata    *byte   // garbage collection data
+	str       nameOff // string form
+	ptrToThis typeOff // type for pointer to this type, may be zero
+}
+
+// Method on non-interface type
+type method struct {
+	name nameOff // name of method
+	mtyp typeOff // method type (without receiver)
+	ifn  textOff // fn used in interface call (one-word receiver)
+	tfn  textOff // fn used for normal method call
+}
+
+// uncommonType is present only for defined types or types with methods
+// (if T is a defined type, the uncommonTypes for T and *T have methods).
+// Using a pointer to this struct reduces the overall size required
+// to describe a non-defined type with no methods.
+type uncommonType struct {
+	pkgPath nameOff // import path; empty for built-in types like int, string
+	mcount  uint16  // number of methods
+	xcount  uint16  // number of exported methods
+	moff    uint32  // offset from this uncommontype to [mcount]method
+	_       uint32  // unused
+}
+
+// chanDir represents a channel type's direction.
+type chanDir int
+
+const (
+	recvDir chanDir             = 1 << iota // <-chan
+	sendDir                                 // chan<-
+	bothDir = recvDir | sendDir             // chan
+)
+
+// arrayType represents a fixed array type.
+type arrayType struct {
+	rtype
+	elem  *rtype // array element type
+	slice *rtype // slice type
+	len   uintptr
+}
+
+// chanType represents a channel type.
+type chanType struct {
+	rtype
+	elem *rtype  // channel element type
+	dir  uintptr // channel direction (chanDir)
+}
+
+// funcType represents a function type.
+//
+// A *rtype for each in and out parameter is stored in an array that
+// directly follows the funcType (and possibly its uncommonType). So
+// a function type with one method, one input, and one output is:
+//
+//	struct {
+//		funcType
+//		uncommonType
+//		[2]*rtype    // [0] is in, [1] is out
+//	}
+type funcType struct {
+	rtype
+	inCount  uint16
+	outCount uint16 // top bit is set if last input parameter is ...
+}
+
+// imethod represents a method on an interface type
+type imethod struct {
+	name nameOff // name of method
+	typ  typeOff // .(*FuncType) underneath
+}
+
+// interfaceType represents an interface type.
+type interfaceType struct {
+	rtype
+	pkgPath name      // import path
+	methods []imethod // sorted by hash
+}
+
+// mapType represents a map type.
+type mapType struct {
+	rtype
+	key    *rtype // map key type
+	elem   *rtype // map element (value) type
+	bucket *rtype // internal bucket structure
+	// function for hashing keys (ptr to key, seed) -> hash
+	hasher     func(unsafe.Pointer, uintptr) uintptr
+	keysize    uint8  // size of key slot
+	valuesize  uint8  // size of value slot
+	bucketsize uint16 // size of bucket
+	flags      uint32
+}
+
+// ptrType represents a pointer type.
+type ptrType struct {
+	rtype
+	elem *rtype // pointer element (pointed at) type
+}
+
+// sliceType represents a slice type.
+type sliceType struct {
+	rtype
+	elem *rtype // slice element type
+}
+
+// Struct field
+type structField struct {
+	name        name    // name is always non-empty
+	typ         *rtype  // type of field
+	offsetEmbed uintptr // byte offset of field<<1 | isEmbedded
+}
+
+func (f *structField) offset() uintptr {
+	return f.offsetEmbed >> 1
+}
+
+func (f *structField) embedded() bool {
+	return f.offsetEmbed&1 != 0
+}
+
+// structType represents a struct type.
+type structType struct {
+	rtype
+	pkgPath name
+	fields  []structField // sorted by offset
+}
+
+// name is an encoded type name with optional extra data.
+//
+// The first byte is a bit field containing:
+//
+//	1<<0 the name is exported
+//	1<<1 tag data follows the name
+//	1<<2 pkgPath nameOff follows the name and tag
+//
+// The next two bytes are the data length:
+//
+//	 l := uint16(data[1])<<8 | uint16(data[2])
+//
+// Bytes [3:3+l] are the string data.
+//
+// If tag data follows then bytes 3+l and 3+l+1 are the tag length,
+// with the data following.
+//
+// If the import path follows, then 4 bytes at the end of
+// the data form a nameOff. The import path is only set for concrete
+// methods that are defined in a different package than their type.
+//
+// If a name starts with "*", then the exported bit represents
+// whether the pointed to type is exported.
+type name struct {
+	bytes *byte
+}
+
+func (n name) data(off int, whySafe string) *byte {
+	return (*byte)(add(unsafe.Pointer(n.bytes), uintptr(off), whySafe))
+}
+
+func (n name) isExported() bool {
+	return (*n.bytes)&(1<<0) != 0
+}
+
+func (n name) hasTag() bool {
+	return (*n.bytes)&(1<<1) != 0
+}
+
+// readVarint parses a varint as encoded by encoding/binary.
+// It returns the number of encoded bytes and the encoded value.
+func (n name) readVarint(off int) (int, int) {
+	v := 0
+	for i := 0; ; i++ {
+		x := *n.data(off+i, "read varint")
+		v += int(x&0x7f) << (7 * i)
+		if x&0x80 == 0 {
+			return i + 1, v
+		}
+	}
+}
+
+func (n name) name() (s string) {
+	if n.bytes == nil {
+		return
+	}
+	i, l := n.readVarint(1)
+	hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
+	hdr.Data = unsafe.Pointer(n.data(1+i, "non-empty string"))
+	hdr.Len = l
+	return
+}
+
+func (n name) tag() (s string) {
+	if !n.hasTag() {
+		return ""
+	}
+	i, l := n.readVarint(1)
+	i2, l2 := n.readVarint(1 + i + l)
+	hdr := (*unsafeheader.String)(unsafe.Pointer(&s))
+	hdr.Data = unsafe.Pointer(n.data(1+i+l+i2, "non-empty string"))
+	hdr.Len = l2
+	return
+}
+
+func (n name) pkgPath() string {
+	if n.bytes == nil || *n.data(0, "name flag field")&(1<<2) == 0 {
+		return ""
+	}
+	i, l := n.readVarint(1)
+	off := 1 + i + l
+	if n.hasTag() {
+		i2, l2 := n.readVarint(off)
+		off += i2 + l2
+	}
+	var nameOff int32
+	// Note that this field may not be aligned in memory,
+	// so we cannot use a direct int32 assignment here.
+	copy((*[4]byte)(unsafe.Pointer(&nameOff))[:], (*[4]byte)(unsafe.Pointer(n.data(off, "name offset field")))[:])
+	pkgPathName := name{(*byte)(resolveTypeOff(unsafe.Pointer(n.bytes), nameOff))}
+	return pkgPathName.name()
+}
+
+/*
+ * The compiler knows the exact layout of all the data structures above.
+ * The compiler does not know about the data structures and methods below.
+ */
+
+const (
+	kindDirectIface = 1 << 5
+	kindGCProg      = 1 << 6 // Type.gc points to GC program
+	kindMask        = (1 << 5) - 1
+)
+
+// String returns the name of k.
+func (k Kind) String() string {
+	if int(k) < len(kindNames) {
+		return kindNames[k]
+	}
+	return kindNames[0]
+}
+
+var kindNames = []string{
+	Invalid:       "invalid",
+	Bool:          "bool",
+	Int:           "int",
+	Int8:          "int8",
+	Int16:         "int16",
+	Int32:         "int32",
+	Int64:         "int64",
+	Uint:          "uint",
+	Uint8:         "uint8",
+	Uint16:        "uint16",
+	Uint32:        "uint32",
+	Uint64:        "uint64",
+	Uintptr:       "uintptr",
+	Float32:       "float32",
+	Float64:       "float64",
+	Complex64:     "complex64",
+	Complex128:    "complex128",
+	Array:         "array",
+	Chan:          "chan",
+	Func:          "func",
+	Interface:     "interface",
+	Map:           "map",
+	Ptr:           "ptr",
+	Slice:         "slice",
+	String:        "string",
+	Struct:        "struct",
+	UnsafePointer: "unsafe.Pointer",
+}
+
+func (t *uncommonType) methods() []method {
+	if t.mcount == 0 {
+		return nil
+	}
+	return (*[1 << 16]method)(add(unsafe.Pointer(t), uintptr(t.moff), "t.mcount > 0"))[:t.mcount:t.mcount]
+}
+
+func (t *uncommonType) exportedMethods() []method {
+	if t.xcount == 0 {
+		return nil
+	}
+	return (*[1 << 16]method)(add(unsafe.Pointer(t), uintptr(t.moff), "t.xcount > 0"))[:t.xcount:t.xcount]
+}
+
+// resolveNameOff resolves a name offset from a base pointer.
+// The (*rtype).nameOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveNameOff(ptrInModule unsafe.Pointer, off int32) unsafe.Pointer
+
+// resolveTypeOff resolves an *rtype offset from a base type.
+// The (*rtype).typeOff method is a convenience wrapper for this function.
+// Implemented in the runtime package.
+func resolveTypeOff(rtype unsafe.Pointer, off int32) unsafe.Pointer
+
+type nameOff int32 // offset to a name
+type typeOff int32 // offset to an *rtype
+type textOff int32 // offset from top of text section
+
+func (t *rtype) nameOff(off nameOff) name {
+	return name{(*byte)(resolveNameOff(unsafe.Pointer(t), int32(off)))}
+}
+
+func (t *rtype) typeOff(off typeOff) *rtype {
+	return (*rtype)(resolveTypeOff(unsafe.Pointer(t), int32(off)))
+}
+
+func (t *rtype) uncommon() *uncommonType {
+	if t.tflag&tflagUncommon == 0 {
+		return nil
+	}
+	switch t.Kind() {
+	case Struct:
+		return &(*structTypeUncommon)(unsafe.Pointer(t)).u
+	case Ptr:
+		type u struct {
+			ptrType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Func:
+		type u struct {
+			funcType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Slice:
+		type u struct {
+			sliceType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Array:
+		type u struct {
+			arrayType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Chan:
+		type u struct {
+			chanType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Map:
+		type u struct {
+			mapType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	case Interface:
+		type u struct {
+			interfaceType
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	default:
+		type u struct {
+			rtype
+			u uncommonType
+		}
+		return &(*u)(unsafe.Pointer(t)).u
+	}
+}
+
+func (t *rtype) String() string {
+	s := t.nameOff(t.str).name()
+	if t.tflag&tflagExtraStar != 0 {
+		return s[1:]
+	}
+	return s
+}
+
+func (t *rtype) Size() uintptr { return t.size }
+
+func (t *rtype) Kind() Kind { return Kind(t.kind & kindMask) }
+
+func (t *rtype) pointers() bool { return t.ptrdata != 0 }
+
+func (t *rtype) common() *rtype { return t }
+
+func (t *rtype) exportedMethods() []method {
+	ut := t.uncommon()
+	if ut == nil {
+		return nil
+	}
+	return ut.exportedMethods()
+}
+
+func (t *rtype) NumMethod() int {
+	if t.Kind() == Interface {
+		tt := (*interfaceType)(unsafe.Pointer(t))
+		return tt.NumMethod()
+	}
+	return len(t.exportedMethods())
+}
+
+func (t *rtype) PkgPath() string {
+	if t.tflag&tflagNamed == 0 {
+		return ""
+	}
+	ut := t.uncommon()
+	if ut == nil {
+		return ""
+	}
+	return t.nameOff(ut.pkgPath).name()
+}
+
+func (t *rtype) hasName() bool {
+	return t.tflag&tflagNamed != 0
+}
+
+func (t *rtype) Name() string {
+	if !t.hasName() {
+		return ""
+	}
+	s := t.String()
+	i := len(s) - 1
+	for i >= 0 && s[i] != '.' {
+		i--
+	}
+	return s[i+1:]
+}
+
+func (t *rtype) chanDir() chanDir {
+	if t.Kind() != Chan {
+		panic("reflect: chanDir of non-chan type")
+	}
+	tt := (*chanType)(unsafe.Pointer(t))
+	return chanDir(tt.dir)
+}
+
+func (t *rtype) Elem() Type {
+	switch t.Kind() {
+	case Array:
+		tt := (*arrayType)(unsafe.Pointer(t))
+		return toType(tt.elem)
+	case Chan:
+		tt := (*chanType)(unsafe.Pointer(t))
+		return toType(tt.elem)
+	case Map:
+		tt := (*mapType)(unsafe.Pointer(t))
+		return toType(tt.elem)
+	case Ptr:
+		tt := (*ptrType)(unsafe.Pointer(t))
+		return toType(tt.elem)
+	case Slice:
+		tt := (*sliceType)(unsafe.Pointer(t))
+		return toType(tt.elem)
+	}
+	panic("reflect: Elem of invalid type")
+}
+
+func (t *rtype) In(i int) Type {
+	if t.Kind() != Func {
+		panic("reflect: In of non-func type")
+	}
+	tt := (*funcType)(unsafe.Pointer(t))
+	return toType(tt.in()[i])
+}
+
+func (t *rtype) Key() Type {
+	if t.Kind() != Map {
+		panic("reflect: Key of non-map type")
+	}
+	tt := (*mapType)(unsafe.Pointer(t))
+	return toType(tt.key)
+}
+
+func (t *rtype) Len() int {
+	if t.Kind() != Array {
+		panic("reflect: Len of non-array type")
+	}
+	tt := (*arrayType)(unsafe.Pointer(t))
+	return int(tt.len)
+}
+
+func (t *rtype) NumField() int {
+	if t.Kind() != Struct {
+		panic("reflect: NumField of non-struct type")
+	}
+	tt := (*structType)(unsafe.Pointer(t))
+	return len(tt.fields)
+}
+
+func (t *rtype) NumIn() int {
+	if t.Kind() != Func {
+		panic("reflect: NumIn of non-func type")
+	}
+	tt := (*funcType)(unsafe.Pointer(t))
+	return int(tt.inCount)
+}
+
+func (t *rtype) NumOut() int {
+	if t.Kind() != Func {
+		panic("reflect: NumOut of non-func type")
+	}
+	tt := (*funcType)(unsafe.Pointer(t))
+	return len(tt.out())
+}
+
+func (t *rtype) Out(i int) Type {
+	if t.Kind() != Func {
+		panic("reflect: Out of non-func type")
+	}
+	tt := (*funcType)(unsafe.Pointer(t))
+	return toType(tt.out()[i])
+}
+
+func (t *funcType) in() []*rtype {
+	uadd := unsafe.Sizeof(*t)
+	if t.tflag&tflagUncommon != 0 {
+		uadd += unsafe.Sizeof(uncommonType{})
+	}
+	if t.inCount == 0 {
+		return nil
+	}
+	return (*[1 << 20]*rtype)(add(unsafe.Pointer(t), uadd, "t.inCount > 0"))[:t.inCount:t.inCount]
+}
+
+func (t *funcType) out() []*rtype {
+	uadd := unsafe.Sizeof(*t)
+	if t.tflag&tflagUncommon != 0 {
+		uadd += unsafe.Sizeof(uncommonType{})
+	}
+	outCount := t.outCount & (1<<15 - 1)
+	if outCount == 0 {
+		return nil
+	}
+	return (*[1 << 20]*rtype)(add(unsafe.Pointer(t), uadd, "outCount > 0"))[t.inCount : t.inCount+outCount : t.inCount+outCount]
+}
+
+// add returns p+x.
+//
+// The whySafe string is ignored, so that the function still inlines
+// as efficiently as p+x, but all call sites should use the string to
+// record why the addition is safe, which is to say why the addition
+// does not cause x to advance to the very end of p's allocation
+// and therefore point incorrectly at the next block in memory.
+func add(p unsafe.Pointer, x uintptr, whySafe string) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(p) + x)
+}
+
+// NumMethod returns the number of interface methods in the type's method set.
+func (t *interfaceType) NumMethod() int { return len(t.methods) }
+
+// TypeOf returns the reflection Type that represents the dynamic type of i.
+// If i is a nil interface value, TypeOf returns nil.
+func TypeOf(i interface{}) Type {
+	eface := *(*emptyInterface)(unsafe.Pointer(&i))
+	return toType(eface.typ)
+}
+
+func (t *rtype) Implements(u Type) bool {
+	if u == nil {
+		panic("reflect: nil type passed to Type.Implements")
+	}
+	if u.Kind() != Interface {
+		panic("reflect: non-interface type passed to Type.Implements")
+	}
+	return implements(u.(*rtype), t)
+}
+
+func (t *rtype) AssignableTo(u Type) bool {
+	if u == nil {
+		panic("reflect: nil type passed to Type.AssignableTo")
+	}
+	uu := u.(*rtype)
+	return directlyAssignable(uu, t) || implements(uu, t)
+}
+
+func (t *rtype) Comparable() bool {
+	return t.equal != nil
+}
+
+// implements reports whether the type V implements the interface type T.
+func implements(T, V *rtype) bool {
+	if T.Kind() != Interface {
+		return false
+	}
+	t := (*interfaceType)(unsafe.Pointer(T))
+	if len(t.methods) == 0 {
+		return true
+	}
+
+	// The same algorithm applies in both cases, but the
+	// method tables for an interface type and a concrete type
+	// are different, so the code is duplicated.
+	// In both cases the algorithm is a linear scan over the two
+	// lists - T's methods and V's methods - simultaneously.
+	// Since method tables are stored in a unique sorted order
+	// (alphabetical, with no duplicate method names), the scan
+	// through V's methods must hit a match for each of T's
+	// methods along the way, or else V does not implement T.
+	// This lets us run the scan in overall linear time instead of
+	// the quadratic time  a naive search would require.
+	// See also ../runtime/iface.go.
+	if V.Kind() == Interface {
+		v := (*interfaceType)(unsafe.Pointer(V))
+		i := 0
+		for j := 0; j < len(v.methods); j++ {
+			tm := &t.methods[i]
+			tmName := t.nameOff(tm.name)
+			vm := &v.methods[j]
+			vmName := V.nameOff(vm.name)
+			if vmName.name() == tmName.name() && V.typeOff(vm.typ) == t.typeOff(tm.typ) {
+				if !tmName.isExported() {
+					tmPkgPath := tmName.pkgPath()
+					if tmPkgPath == "" {
+						tmPkgPath = t.pkgPath.name()
+					}
+					vmPkgPath := vmName.pkgPath()
+					if vmPkgPath == "" {
+						vmPkgPath = v.pkgPath.name()
+					}
+					if tmPkgPath != vmPkgPath {
+						continue
+					}
+				}
+				if i++; i >= len(t.methods) {
+					return true
+				}
+			}
+		}
+		return false
+	}
+
+	v := V.uncommon()
+	if v == nil {
+		return false
+	}
+	i := 0
+	vmethods := v.methods()
+	for j := 0; j < int(v.mcount); j++ {
+		tm := &t.methods[i]
+		tmName := t.nameOff(tm.name)
+		vm := vmethods[j]
+		vmName := V.nameOff(vm.name)
+		if vmName.name() == tmName.name() && V.typeOff(vm.mtyp) == t.typeOff(tm.typ) {
+			if !tmName.isExported() {
+				tmPkgPath := tmName.pkgPath()
+				if tmPkgPath == "" {
+					tmPkgPath = t.pkgPath.name()
+				}
+				vmPkgPath := vmName.pkgPath()
+				if vmPkgPath == "" {
+					vmPkgPath = V.nameOff(v.pkgPath).name()
+				}
+				if tmPkgPath != vmPkgPath {
+					continue
+				}
+			}
+			if i++; i >= len(t.methods) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// directlyAssignable reports whether a value x of type V can be directly
+// assigned (using memmove) to a value of type T.
+// https://golang.org/doc/go_spec.html#Assignability
+// Ignoring the interface rules (implemented elsewhere)
+// and the ideal constant rules (no ideal constants at run time).
+func directlyAssignable(T, V *rtype) bool {
+	// x's type V is identical to T?
+	if T == V {
+		return true
+	}
+
+	// Otherwise at least one of T and V must not be defined
+	// and they must have the same kind.
+	if T.hasName() && V.hasName() || T.Kind() != V.Kind() {
+		return false
+	}
+
+	// x's type T and V must  have identical underlying types.
+	return haveIdenticalUnderlyingType(T, V, true)
+}
+
+func haveIdenticalType(T, V Type, cmpTags bool) bool {
+	if cmpTags {
+		return T == V
+	}
+
+	if T.Name() != V.Name() || T.Kind() != V.Kind() {
+		return false
+	}
+
+	return haveIdenticalUnderlyingType(T.common(), V.common(), false)
+}
+
+func haveIdenticalUnderlyingType(T, V *rtype, cmpTags bool) bool {
+	if T == V {
+		return true
+	}
+
+	kind := T.Kind()
+	if kind != V.Kind() {
+		return false
+	}
+
+	// Non-composite types of equal kind have same underlying type
+	// (the predefined instance of the type).
+	if Bool <= kind && kind <= Complex128 || kind == String || kind == UnsafePointer {
+		return true
+	}
+
+	// Composite types.
+	switch kind {
+	case Array:
+		return T.Len() == V.Len() && haveIdenticalType(T.Elem(), V.Elem(), cmpTags)
+
+	case Chan:
+		// Special case:
+		// x is a bidirectional channel value, T is a channel type,
+		// and x's type V and T have identical element types.
+		if V.chanDir() == bothDir && haveIdenticalType(T.Elem(), V.Elem(), cmpTags) {
+			return true
+		}
+
+		// Otherwise continue test for identical underlying type.
+		return V.chanDir() == T.chanDir() && haveIdenticalType(T.Elem(), V.Elem(), cmpTags)
+
+	case Func:
+		t := (*funcType)(unsafe.Pointer(T))
+		v := (*funcType)(unsafe.Pointer(V))
+		if t.outCount != v.outCount || t.inCount != v.inCount {
+			return false
+		}
+		for i := 0; i < t.NumIn(); i++ {
+			if !haveIdenticalType(t.In(i), v.In(i), cmpTags) {
+				return false
+			}
+		}
+		for i := 0; i < t.NumOut(); i++ {
+			if !haveIdenticalType(t.Out(i), v.Out(i), cmpTags) {
+				return false
+			}
+		}
+		return true
+
+	case Interface:
+		t := (*interfaceType)(unsafe.Pointer(T))
+		v := (*interfaceType)(unsafe.Pointer(V))
+		if len(t.methods) == 0 && len(v.methods) == 0 {
+			return true
+		}
+		// Might have the same methods but still
+		// need a run time conversion.
+		return false
+
+	case Map:
+		return haveIdenticalType(T.Key(), V.Key(), cmpTags) && haveIdenticalType(T.Elem(), V.Elem(), cmpTags)
+
+	case Ptr, Slice:
+		return haveIdenticalType(T.Elem(), V.Elem(), cmpTags)
+
+	case Struct:
+		t := (*structType)(unsafe.Pointer(T))
+		v := (*structType)(unsafe.Pointer(V))
+		if len(t.fields) != len(v.fields) {
+			return false
+		}
+		if t.pkgPath.name() != v.pkgPath.name() {
+			return false
+		}
+		for i := range t.fields {
+			tf := &t.fields[i]
+			vf := &v.fields[i]
+			if tf.name.name() != vf.name.name() {
+				return false
+			}
+			if !haveIdenticalType(tf.typ, vf.typ, cmpTags) {
+				return false
+			}
+			if cmpTags && tf.name.tag() != vf.name.tag() {
+				return false
+			}
+			if tf.offsetEmbed != vf.offsetEmbed {
+				return false
+			}
+		}
+		return true
+	}
+
+	return false
+}
+
+type structTypeUncommon struct {
+	structType
+	u uncommonType
+}
+
+// toType converts from a *rtype to a Type that can be returned
+// to the client of package reflect. In gc, the only concern is that
+// a nil *rtype must be replaced by a nil Type, but in gccgo this
+// function takes care of ensuring that multiple *rtype for the same
+// type are coalesced into a single Type.
+func toType(t *rtype) Type {
+	if t == nil {
+		return nil
+	}
+	return t
+}
+
+// ifaceIndir reports whether t is stored indirectly in an interface value.
+func ifaceIndir(t *rtype) bool {
+	return t.kind&kindDirectIface == 0
+}
diff --git a/src/internal/reflectlite/value.go b/src/internal/reflectlite/value.go
new file mode 100644
index 0000000..0365eee
--- /dev/null
+++ b/src/internal/reflectlite/value.go
@@ -0,0 +1,477 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectlite
+
+import (
+	"internal/unsafeheader"
+	"runtime"
+	"unsafe"
+)
+
+const ptrSize = 4 << (^uintptr(0) >> 63) // unsafe.Sizeof(uintptr(0)) but an ideal const
+
+// Value is the reflection interface to a Go value.
+//
+// Not all methods apply to all kinds of values. Restrictions,
+// if any, are noted in the documentation for each method.
+// Use the Kind method to find out the kind of value before
+// calling kind-specific methods. Calling a method
+// inappropriate to the kind of type causes a run time panic.
+//
+// The zero Value represents no value.
+// Its IsValid method returns false, its Kind method returns Invalid,
+// its String method returns "<invalid Value>", and all other methods panic.
+// Most functions and methods never return an invalid value.
+// If one does, its documentation states the conditions explicitly.
+//
+// A Value can be used concurrently by multiple goroutines provided that
+// the underlying Go value can be used concurrently for the equivalent
+// direct operations.
+//
+// To compare two Values, compare the results of the Interface method.
+// Using == on two Values does not compare the underlying values
+// they represent.
+type Value struct {
+	// typ holds the type of the value represented by a Value.
+	typ *rtype
+
+	// Pointer-valued data or, if flagIndir is set, pointer to data.
+	// Valid when either flagIndir is set or typ.pointers() is true.
+	ptr unsafe.Pointer
+
+	// flag holds metadata about the value.
+	// The lowest bits are flag bits:
+	//	- flagStickyRO: obtained via unexported not embedded field, so read-only
+	//	- flagEmbedRO: obtained via unexported embedded field, so read-only
+	//	- flagIndir: val holds a pointer to the data
+	//	- flagAddr: v.CanAddr is true (implies flagIndir)
+	// Value cannot represent method values.
+	// The next five bits give the Kind of the value.
+	// This repeats typ.Kind() except for method values.
+	// The remaining 23+ bits give a method number for method values.
+	// If flag.kind() != Func, code can assume that flagMethod is unset.
+	// If ifaceIndir(typ), code can assume that flagIndir is set.
+	flag
+
+	// A method value represents a curried method invocation
+	// like r.Read for some receiver r. The typ+val+flag bits describe
+	// the receiver r, but the flag's Kind bits say Func (methods are
+	// functions), and the top bits of the flag give the method number
+	// in r's type's method table.
+}
+
+type flag uintptr
+
+const (
+	flagKindWidth        = 5 // there are 27 kinds
+	flagKindMask    flag = 1<<flagKindWidth - 1
+	flagStickyRO    flag = 1 << 5
+	flagEmbedRO     flag = 1 << 6
+	flagIndir       flag = 1 << 7
+	flagAddr        flag = 1 << 8
+	flagMethod      flag = 1 << 9
+	flagMethodShift      = 10
+	flagRO          flag = flagStickyRO | flagEmbedRO
+)
+
+func (f flag) kind() Kind {
+	return Kind(f & flagKindMask)
+}
+
+func (f flag) ro() flag {
+	if f&flagRO != 0 {
+		return flagStickyRO
+	}
+	return 0
+}
+
+// pointer returns the underlying pointer represented by v.
+// v.Kind() must be Ptr, Map, Chan, Func, or UnsafePointer
+func (v Value) pointer() unsafe.Pointer {
+	if v.typ.size != ptrSize || !v.typ.pointers() {
+		panic("can't call pointer on a non-pointer Value")
+	}
+	if v.flag&flagIndir != 0 {
+		return *(*unsafe.Pointer)(v.ptr)
+	}
+	return v.ptr
+}
+
+// packEface converts v to the empty interface.
+func packEface(v Value) interface{} {
+	t := v.typ
+	var i interface{}
+	e := (*emptyInterface)(unsafe.Pointer(&i))
+	// First, fill in the data portion of the interface.
+	switch {
+	case ifaceIndir(t):
+		if v.flag&flagIndir == 0 {
+			panic("bad indir")
+		}
+		// Value is indirect, and so is the interface we're making.
+		ptr := v.ptr
+		if v.flag&flagAddr != 0 {
+			// TODO: pass safe boolean from valueInterface so
+			// we don't need to copy if safe==true?
+			c := unsafe_New(t)
+			typedmemmove(t, c, ptr)
+			ptr = c
+		}
+		e.word = ptr
+	case v.flag&flagIndir != 0:
+		// Value is indirect, but interface is direct. We need
+		// to load the data at v.ptr into the interface data word.
+		e.word = *(*unsafe.Pointer)(v.ptr)
+	default:
+		// Value is direct, and so is the interface.
+		e.word = v.ptr
+	}
+	// Now, fill in the type portion. We're very careful here not
+	// to have any operation between the e.word and e.typ assignments
+	// that would let the garbage collector observe the partially-built
+	// interface value.
+	e.typ = t
+	return i
+}
+
+// unpackEface converts the empty interface i to a Value.
+func unpackEface(i interface{}) Value {
+	e := (*emptyInterface)(unsafe.Pointer(&i))
+	// NOTE: don't read e.word until we know whether it is really a pointer or not.
+	t := e.typ
+	if t == nil {
+		return Value{}
+	}
+	f := flag(t.Kind())
+	if ifaceIndir(t) {
+		f |= flagIndir
+	}
+	return Value{t, e.word, f}
+}
+
+// A ValueError occurs when a Value method is invoked on
+// a Value that does not support it. Such cases are documented
+// in the description of each method.
+type ValueError struct {
+	Method string
+	Kind   Kind
+}
+
+func (e *ValueError) Error() string {
+	if e.Kind == 0 {
+		return "reflect: call of " + e.Method + " on zero Value"
+	}
+	return "reflect: call of " + e.Method + " on " + e.Kind.String() + " Value"
+}
+
+// methodName returns the name of the calling method,
+// assumed to be two stack frames above.
+func methodName() string {
+	pc, _, _, _ := runtime.Caller(2)
+	f := runtime.FuncForPC(pc)
+	if f == nil {
+		return "unknown method"
+	}
+	return f.Name()
+}
+
+// emptyInterface is the header for an interface{} value.
+type emptyInterface struct {
+	typ  *rtype
+	word unsafe.Pointer
+}
+
+// mustBeExported panics if f records that the value was obtained using
+// an unexported field.
+func (f flag) mustBeExported() {
+	if f == 0 {
+		panic(&ValueError{methodName(), 0})
+	}
+	if f&flagRO != 0 {
+		panic("reflect: " + methodName() + " using value obtained using unexported field")
+	}
+}
+
+// mustBeAssignable panics if f records that the value is not assignable,
+// which is to say that either it was obtained using an unexported field
+// or it is not addressable.
+func (f flag) mustBeAssignable() {
+	if f == 0 {
+		panic(&ValueError{methodName(), Invalid})
+	}
+	// Assignable if addressable and not read-only.
+	if f&flagRO != 0 {
+		panic("reflect: " + methodName() + " using value obtained using unexported field")
+	}
+	if f&flagAddr == 0 {
+		panic("reflect: " + methodName() + " using unaddressable value")
+	}
+}
+
+// CanSet reports whether the value of v can be changed.
+// A Value can be changed only if it is addressable and was not
+// obtained by the use of unexported struct fields.
+// If CanSet returns false, calling Set or any type-specific
+// setter (e.g., SetBool, SetInt) will panic.
+func (v Value) CanSet() bool {
+	return v.flag&(flagAddr|flagRO) == flagAddr
+}
+
+// Elem returns the value that the interface v contains
+// or that the pointer v points to.
+// It panics if v's Kind is not Interface or Ptr.
+// It returns the zero Value if v is nil.
+func (v Value) Elem() Value {
+	k := v.kind()
+	switch k {
+	case Interface:
+		var eface interface{}
+		if v.typ.NumMethod() == 0 {
+			eface = *(*interface{})(v.ptr)
+		} else {
+			eface = (interface{})(*(*interface {
+				M()
+			})(v.ptr))
+		}
+		x := unpackEface(eface)
+		if x.flag != 0 {
+			x.flag |= v.flag.ro()
+		}
+		return x
+	case Ptr:
+		ptr := v.ptr
+		if v.flag&flagIndir != 0 {
+			ptr = *(*unsafe.Pointer)(ptr)
+		}
+		// The returned value's address is v's value.
+		if ptr == nil {
+			return Value{}
+		}
+		tt := (*ptrType)(unsafe.Pointer(v.typ))
+		typ := tt.elem
+		fl := v.flag&flagRO | flagIndir | flagAddr
+		fl |= flag(typ.Kind())
+		return Value{typ, ptr, fl}
+	}
+	panic(&ValueError{"reflectlite.Value.Elem", v.kind()})
+}
+
+func valueInterface(v Value) interface{} {
+	if v.flag == 0 {
+		panic(&ValueError{"reflectlite.Value.Interface", 0})
+	}
+
+	if v.kind() == Interface {
+		// Special case: return the element inside the interface.
+		// Empty interface has one layout, all interfaces with
+		// methods have a second layout.
+		if v.numMethod() == 0 {
+			return *(*interface{})(v.ptr)
+		}
+		return *(*interface {
+			M()
+		})(v.ptr)
+	}
+
+	// TODO: pass safe to packEface so we don't need to copy if safe==true?
+	return packEface(v)
+}
+
+// IsNil reports whether its argument v is nil. The argument must be
+// a chan, func, interface, map, pointer, or slice value; if it is
+// not, IsNil panics. Note that IsNil is not always equivalent to a
+// regular comparison with nil in Go. For example, if v was created
+// by calling ValueOf with an uninitialized interface variable i,
+// i==nil will be true but v.IsNil will panic as v will be the zero
+// Value.
+func (v Value) IsNil() bool {
+	k := v.kind()
+	switch k {
+	case Chan, Func, Map, Ptr, UnsafePointer:
+		// if v.flag&flagMethod != 0 {
+		// 	return false
+		// }
+		ptr := v.ptr
+		if v.flag&flagIndir != 0 {
+			ptr = *(*unsafe.Pointer)(ptr)
+		}
+		return ptr == nil
+	case Interface, Slice:
+		// Both interface and slice are nil if first word is 0.
+		// Both are always bigger than a word; assume flagIndir.
+		return *(*unsafe.Pointer)(v.ptr) == nil
+	}
+	panic(&ValueError{"reflectlite.Value.IsNil", v.kind()})
+}
+
+// IsValid reports whether v represents a value.
+// It returns false if v is the zero Value.
+// If IsValid returns false, all other methods except String panic.
+// Most functions and methods never return an invalid Value.
+// If one does, its documentation states the conditions explicitly.
+func (v Value) IsValid() bool {
+	return v.flag != 0
+}
+
+// Kind returns v's Kind.
+// If v is the zero Value (IsValid returns false), Kind returns Invalid.
+func (v Value) Kind() Kind {
+	return v.kind()
+}
+
+// implemented in runtime:
+func chanlen(unsafe.Pointer) int
+func maplen(unsafe.Pointer) int
+
+// Len returns v's length.
+// It panics if v's Kind is not Array, Chan, Map, Slice, or String.
+func (v Value) Len() int {
+	k := v.kind()
+	switch k {
+	case Array:
+		tt := (*arrayType)(unsafe.Pointer(v.typ))
+		return int(tt.len)
+	case Chan:
+		return chanlen(v.pointer())
+	case Map:
+		return maplen(v.pointer())
+	case Slice:
+		// Slice is bigger than a word; assume flagIndir.
+		return (*unsafeheader.Slice)(v.ptr).Len
+	case String:
+		// String is bigger than a word; assume flagIndir.
+		return (*unsafeheader.String)(v.ptr).Len
+	}
+	panic(&ValueError{"reflect.Value.Len", v.kind()})
+}
+
+// NumMethod returns the number of exported methods in the value's method set.
+func (v Value) numMethod() int {
+	if v.typ == nil {
+		panic(&ValueError{"reflectlite.Value.NumMethod", Invalid})
+	}
+	return v.typ.NumMethod()
+}
+
+// Set assigns x to the value v.
+// It panics if CanSet returns false.
+// As in Go, x's value must be assignable to v's type.
+func (v Value) Set(x Value) {
+	v.mustBeAssignable()
+	x.mustBeExported() // do not let unexported x leak
+	var target unsafe.Pointer
+	if v.kind() == Interface {
+		target = v.ptr
+	}
+	x = x.assignTo("reflectlite.Set", v.typ, target)
+	if x.flag&flagIndir != 0 {
+		typedmemmove(v.typ, v.ptr, x.ptr)
+	} else {
+		*(*unsafe.Pointer)(v.ptr) = x.ptr
+	}
+}
+
+// Type returns v's type.
+func (v Value) Type() Type {
+	f := v.flag
+	if f == 0 {
+		panic(&ValueError{"reflectlite.Value.Type", Invalid})
+	}
+	// Method values not supported.
+	return v.typ
+}
+
+/*
+ * constructors
+ */
+
+// implemented in package runtime
+func unsafe_New(*rtype) unsafe.Pointer
+
+// ValueOf returns a new Value initialized to the concrete value
+// stored in the interface i. ValueOf(nil) returns the zero Value.
+func ValueOf(i interface{}) Value {
+	if i == nil {
+		return Value{}
+	}
+
+	// TODO: Maybe allow contents of a Value to live on the stack.
+	// For now we make the contents always escape to the heap. It
+	// makes life easier in a few places (see chanrecv/mapassign
+	// comment below).
+	escapes(i)
+
+	return unpackEface(i)
+}
+
+// assignTo returns a value v that can be assigned directly to typ.
+// It panics if v is not assignable to typ.
+// For a conversion to an interface type, target is a suggested scratch space to use.
+func (v Value) assignTo(context string, dst *rtype, target unsafe.Pointer) Value {
+	// if v.flag&flagMethod != 0 {
+	// 	v = makeMethodValue(context, v)
+	// }
+
+	switch {
+	case directlyAssignable(dst, v.typ):
+		// Overwrite type so that they match.
+		// Same memory layout, so no harm done.
+		fl := v.flag&(flagAddr|flagIndir) | v.flag.ro()
+		fl |= flag(dst.Kind())
+		return Value{dst, v.ptr, fl}
+
+	case implements(dst, v.typ):
+		if target == nil {
+			target = unsafe_New(dst)
+		}
+		if v.Kind() == Interface && v.IsNil() {
+			// A nil ReadWriter passed to nil Reader is OK,
+			// but using ifaceE2I below will panic.
+			// Avoid the panic by returning a nil dst (e.g., Reader) explicitly.
+			return Value{dst, nil, flag(Interface)}
+		}
+		x := valueInterface(v)
+		if dst.NumMethod() == 0 {
+			*(*interface{})(target) = x
+		} else {
+			ifaceE2I(dst, x, target)
+		}
+		return Value{dst, target, flagIndir | flag(Interface)}
+	}
+
+	// Failed.
+	panic(context + ": value of type " + v.typ.String() + " is not assignable to type " + dst.String())
+}
+
+// arrayAt returns the i-th element of p,
+// an array whose elements are eltSize bytes wide.
+// The array pointed at by p must have at least i+1 elements:
+// it is invalid (but impossible to check here) to pass i >= len,
+// because then the result will point outside the array.
+// whySafe must explain why i < len. (Passing "i < len" is fine;
+// the benefit is to surface this assumption at the call site.)
+func arrayAt(p unsafe.Pointer, i int, eltSize uintptr, whySafe string) unsafe.Pointer {
+	return add(p, uintptr(i)*eltSize, "i < len")
+}
+
+func ifaceE2I(t *rtype, src interface{}, dst unsafe.Pointer)
+
+// typedmemmove copies a value of type t to dst from src.
+//go:noescape
+func typedmemmove(t *rtype, dst, src unsafe.Pointer)
+
+// Dummy annotation marking that the value x escapes,
+// for use in cases where the reflect code is so clever that
+// the compiler cannot follow.
+func escapes(x interface{}) {
+	if dummy.b {
+		dummy.x = x
+	}
+}
+
+var dummy struct {
+	b bool
+	x interface{}
+}
diff --git a/src/internal/singleflight/singleflight.go b/src/internal/singleflight/singleflight.go
new file mode 100644
index 0000000..b2d82e2
--- /dev/null
+++ b/src/internal/singleflight/singleflight.go
@@ -0,0 +1,123 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package singleflight provides a duplicate function call suppression
+// mechanism.
+package singleflight
+
+import "sync"
+
+// call is an in-flight or completed singleflight.Do call
+type call struct {
+	wg sync.WaitGroup
+
+	// These fields are written once before the WaitGroup is done
+	// and are only read after the WaitGroup is done.
+	val interface{}
+	err error
+
+	// These fields are read and written with the singleflight
+	// mutex held before the WaitGroup is done, and are read but
+	// not written after the WaitGroup is done.
+	dups  int
+	chans []chan<- Result
+}
+
+// Group represents a class of work and forms a namespace in
+// which units of work can be executed with duplicate suppression.
+type Group struct {
+	mu sync.Mutex       // protects m
+	m  map[string]*call // lazily initialized
+}
+
+// Result holds the results of Do, so they can be passed
+// on a channel.
+type Result struct {
+	Val    interface{}
+	Err    error
+	Shared bool
+}
+
+// Do executes and returns the results of the given function, making
+// sure that only one execution is in-flight for a given key at a
+// time. If a duplicate comes in, the duplicate caller waits for the
+// original to complete and receives the same results.
+// The return value shared indicates whether v was given to multiple callers.
+func (g *Group) Do(key string, fn func() (interface{}, error)) (v interface{}, err error, shared bool) {
+	g.mu.Lock()
+	if g.m == nil {
+		g.m = make(map[string]*call)
+	}
+	if c, ok := g.m[key]; ok {
+		c.dups++
+		g.mu.Unlock()
+		c.wg.Wait()
+		return c.val, c.err, true
+	}
+	c := new(call)
+	c.wg.Add(1)
+	g.m[key] = c
+	g.mu.Unlock()
+
+	g.doCall(c, key, fn)
+	return c.val, c.err, c.dups > 0
+}
+
+// DoChan is like Do but returns a channel that will receive the
+// results when they are ready. The second result is true if the function
+// will eventually be called, false if it will not (because there is
+// a pending request with this key).
+func (g *Group) DoChan(key string, fn func() (interface{}, error)) (<-chan Result, bool) {
+	ch := make(chan Result, 1)
+	g.mu.Lock()
+	if g.m == nil {
+		g.m = make(map[string]*call)
+	}
+	if c, ok := g.m[key]; ok {
+		c.dups++
+		c.chans = append(c.chans, ch)
+		g.mu.Unlock()
+		return ch, false
+	}
+	c := &call{chans: []chan<- Result{ch}}
+	c.wg.Add(1)
+	g.m[key] = c
+	g.mu.Unlock()
+
+	go g.doCall(c, key, fn)
+
+	return ch, true
+}
+
+// doCall handles the single call for a key.
+func (g *Group) doCall(c *call, key string, fn func() (interface{}, error)) {
+	c.val, c.err = fn()
+	c.wg.Done()
+
+	g.mu.Lock()
+	delete(g.m, key)
+	for _, ch := range c.chans {
+		ch <- Result{c.val, c.err, c.dups > 0}
+	}
+	g.mu.Unlock()
+}
+
+// ForgetUnshared tells the singleflight to forget about a key if it is not
+// shared with any other goroutines. Future calls to Do for a forgotten key
+// will call the function rather than waiting for an earlier call to complete.
+// Returns whether the key was forgotten or unknown--that is, whether no
+// other goroutines are waiting for the result.
+func (g *Group) ForgetUnshared(key string) bool {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+	c, ok := g.m[key]
+	if !ok {
+		return true
+	}
+	if c.dups == 0 {
+		delete(g.m, key)
+		return true
+	}
+	return false
+}
diff --git a/src/internal/singleflight/singleflight_test.go b/src/internal/singleflight/singleflight_test.go
new file mode 100644
index 0000000..6404a17
--- /dev/null
+++ b/src/internal/singleflight/singleflight_test.go
@@ -0,0 +1,87 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package singleflight
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestDo(t *testing.T) {
+	var g Group
+	v, err, _ := g.Do("key", func() (interface{}, error) {
+		return "bar", nil
+	})
+	if got, want := fmt.Sprintf("%v (%T)", v, v), "bar (string)"; got != want {
+		t.Errorf("Do = %v; want %v", got, want)
+	}
+	if err != nil {
+		t.Errorf("Do error = %v", err)
+	}
+}
+
+func TestDoErr(t *testing.T) {
+	var g Group
+	someErr := errors.New("some error")
+	v, err, _ := g.Do("key", func() (interface{}, error) {
+		return nil, someErr
+	})
+	if err != someErr {
+		t.Errorf("Do error = %v; want someErr %v", err, someErr)
+	}
+	if v != nil {
+		t.Errorf("unexpected non-nil value %#v", v)
+	}
+}
+
+func TestDoDupSuppress(t *testing.T) {
+	var g Group
+	var wg1, wg2 sync.WaitGroup
+	c := make(chan string, 1)
+	var calls int32
+	fn := func() (interface{}, error) {
+		if atomic.AddInt32(&calls, 1) == 1 {
+			// First invocation.
+			wg1.Done()
+		}
+		v := <-c
+		c <- v // pump; make available for any future calls
+
+		time.Sleep(10 * time.Millisecond) // let more goroutines enter Do
+
+		return v, nil
+	}
+
+	const n = 10
+	wg1.Add(1)
+	for i := 0; i < n; i++ {
+		wg1.Add(1)
+		wg2.Add(1)
+		go func() {
+			defer wg2.Done()
+			wg1.Done()
+			v, err, _ := g.Do("key", fn)
+			if err != nil {
+				t.Errorf("Do error: %v", err)
+				return
+			}
+			if s, _ := v.(string); s != "bar" {
+				t.Errorf("Do = %T %v; want %q", v, v, "bar")
+			}
+		}()
+	}
+	wg1.Wait()
+	// At least one goroutine is in fn now and all of them have at
+	// least reached the line before the Do.
+	c <- "bar"
+	wg2.Wait()
+	if got := atomic.LoadInt32(&calls); got <= 0 || got >= n {
+		t.Errorf("number of calls = %d; want over 0 and less than %d", got, n)
+	}
+}
diff --git a/src/internal/syscall/execenv/execenv_default.go b/src/internal/syscall/execenv/execenv_default.go
new file mode 100644
index 0000000..73289f1
--- /dev/null
+++ b/src/internal/syscall/execenv/execenv_default.go
@@ -0,0 +1,20 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+// +build !windows
+
+package execenv
+
+import "syscall"
+
+// Default will return the default environment
+// variables based on the process attributes
+// provided.
+//
+// Defaults to syscall.Environ() on all platforms
+// other than Windows.
+func Default(sys *syscall.SysProcAttr) ([]string, error) {
+	return syscall.Environ(), nil
+}
diff --git a/src/internal/syscall/execenv/execenv_windows.go b/src/internal/syscall/execenv/execenv_windows.go
new file mode 100644
index 0000000..6c06549
--- /dev/null
+++ b/src/internal/syscall/execenv/execenv_windows.go
@@ -0,0 +1,55 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build windows
+// +build windows
+
+package execenv
+
+import (
+	"internal/syscall/windows"
+	"syscall"
+	"unicode/utf16"
+	"unsafe"
+)
+
+// Default will return the default environment
+// variables based on the process attributes
+// provided.
+//
+// If the process attributes contain a token, then
+// the environment variables will be sourced from
+// the defaults for that user token, otherwise they
+// will be sourced from syscall.Environ().
+func Default(sys *syscall.SysProcAttr) (env []string, err error) {
+	if sys == nil || sys.Token == 0 {
+		return syscall.Environ(), nil
+	}
+	var block *uint16
+	err = windows.CreateEnvironmentBlock(&block, sys.Token, false)
+	if err != nil {
+		return nil, err
+	}
+	defer windows.DestroyEnvironmentBlock(block)
+	blockp := uintptr(unsafe.Pointer(block))
+	for {
+
+		// find NUL terminator
+		end := unsafe.Pointer(blockp)
+		for *(*uint16)(end) != 0 {
+			end = unsafe.Pointer(uintptr(end) + 2)
+		}
+
+		n := (uintptr(end) - uintptr(unsafe.Pointer(blockp))) / 2
+		if n == 0 {
+			// environment block ends with empty string
+			break
+		}
+
+		entry := (*[(1 << 30) - 1]uint16)(unsafe.Pointer(blockp))[:n:n]
+		env = append(env, string(utf16.Decode(entry)))
+		blockp += 2 * (uintptr(len(entry)) + 1)
+	}
+	return
+}
diff --git a/src/internal/syscall/unix/asm_aix_ppc64.s b/src/internal/syscall/unix/asm_aix_ppc64.s
new file mode 100644
index 0000000..9e82e3e
--- /dev/null
+++ b/src/internal/syscall/unix/asm_aix_ppc64.s
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+//
+// System calls for aix/ppc64 are implemented in syscall/syscall_aix.go
+//
+
+TEXT ·syscall6(SB),NOSPLIT,$0
+	JMP	syscall·syscall6(SB)
diff --git a/src/internal/syscall/unix/asm_darwin.s b/src/internal/syscall/unix/asm_darwin.s
new file mode 100644
index 0000000..8fbdc1d
--- /dev/null
+++ b/src/internal/syscall/unix/asm_darwin.s
@@ -0,0 +1,8 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·libc_getentropy_trampoline(SB),NOSPLIT,$0-0
+	JMP	libc_getentropy(SB)
diff --git a/src/internal/syscall/unix/asm_solaris.s b/src/internal/syscall/unix/asm_solaris.s
new file mode 100644
index 0000000..2057338
--- /dev/null
+++ b/src/internal/syscall/unix/asm_solaris.s
@@ -0,0 +1,10 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// System calls for Solaris are implemented in runtime/syscall_solaris.go
+
+TEXT ·syscall6(SB),NOSPLIT,$0-88
+	JMP	syscall·sysvicall6(SB)
diff --git a/src/internal/syscall/unix/at.go b/src/internal/syscall/unix/at.go
new file mode 100644
index 0000000..9b08864
--- /dev/null
+++ b/src/internal/syscall/unix/at.go
@@ -0,0 +1,59 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux || openbsd || netbsd || dragonfly
+// +build linux openbsd netbsd dragonfly
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func Unlinkat(dirfd int, path string, flags int) error {
+	var p *byte
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall.Syscall(unlinkatTrap, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags))
+	if errno != 0 {
+		return errno
+	}
+
+	return nil
+}
+
+func Openat(dirfd int, path string, flags int, perm uint32) (int, error) {
+	var p *byte
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return 0, err
+	}
+
+	fd, _, errno := syscall.Syscall6(openatTrap, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags), uintptr(perm), 0, 0)
+	if errno != 0 {
+		return 0, errno
+	}
+
+	return int(fd), nil
+}
+
+func Fstatat(dirfd int, path string, stat *syscall.Stat_t, flags int) error {
+	var p *byte
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall.Syscall6(fstatatTrap, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(stat)), uintptr(flags), 0, 0)
+	if errno != 0 {
+		return errno
+	}
+
+	return nil
+
+}
diff --git a/src/internal/syscall/unix/at_aix.go b/src/internal/syscall/unix/at_aix.go
new file mode 100644
index 0000000..425df98
--- /dev/null
+++ b/src/internal/syscall/unix/at_aix.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+//go:cgo_import_dynamic libc_fstatat fstatat "libc.a/shr_64.o"
+//go:cgo_import_dynamic libc_openat openat "libc.a/shr_64.o"
+//go:cgo_import_dynamic libc_unlinkat unlinkat "libc.a/shr_64.o"
+
+const (
+	AT_REMOVEDIR        = 0x1
+	AT_SYMLINK_NOFOLLOW = 0x1
+)
diff --git a/src/internal/syscall/unix/at_darwin.go b/src/internal/syscall/unix/at_darwin.go
new file mode 100644
index 0000000..a88a27e
--- /dev/null
+++ b/src/internal/syscall/unix/at_darwin.go
@@ -0,0 +1,31 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"syscall"
+	_ "unsafe" // for linkname
+)
+
+func Unlinkat(dirfd int, path string, flags int) error {
+	return unlinkat(dirfd, path, flags)
+}
+
+func Openat(dirfd int, path string, flags int, perm uint32) (int, error) {
+	return openat(dirfd, path, flags, perm)
+}
+
+func Fstatat(dirfd int, path string, stat *syscall.Stat_t, flags int) error {
+	return fstatat(dirfd, path, stat, flags)
+}
+
+//go:linkname unlinkat syscall.unlinkat
+func unlinkat(dirfd int, path string, flags int) error
+
+//go:linkname openat syscall.openat
+func openat(dirfd int, path string, flags int, perm uint32) (int, error)
+
+//go:linkname fstatat syscall.fstatat
+func fstatat(dirfd int, path string, stat *syscall.Stat_t, flags int) error
diff --git a/src/internal/syscall/unix/at_freebsd.go b/src/internal/syscall/unix/at_freebsd.go
new file mode 100644
index 0000000..e171f4d
--- /dev/null
+++ b/src/internal/syscall/unix/at_freebsd.go
@@ -0,0 +1,47 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	AT_REMOVEDIR        = 0x800
+	AT_SYMLINK_NOFOLLOW = 0x200
+)
+
+func Unlinkat(dirfd int, path string, flags int) error {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall.Syscall(syscall.SYS_UNLINKAT, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags))
+	if errno != 0 {
+		return errno
+	}
+
+	return nil
+}
+
+func Openat(dirfd int, path string, flags int, perm uint32) (int, error) {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return 0, err
+	}
+
+	fd, _, errno := syscall.Syscall6(syscall.SYS_OPENAT, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags), uintptr(perm), 0, 0)
+	if errno != 0 {
+		return 0, errno
+	}
+
+	return int(fd), nil
+}
+
+func Fstatat(dirfd int, path string, stat *syscall.Stat_t, flags int) error {
+	return syscall.Fstatat(dirfd, path, stat, flags)
+}
diff --git a/src/internal/syscall/unix/at_libc.go b/src/internal/syscall/unix/at_libc.go
new file mode 100644
index 0000000..4cc351e
--- /dev/null
+++ b/src/internal/syscall/unix/at_libc.go
@@ -0,0 +1,65 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || solaris
+// +build aix solaris
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:linkname procFstatat libc_fstatat
+//go:linkname procOpenat libc_openat
+//go:linkname procUnlinkat libc_unlinkat
+
+var (
+	procFstatat,
+	procOpenat,
+	procUnlinkat uintptr
+)
+
+func Unlinkat(dirfd int, path string, flags int) error {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall6(uintptr(unsafe.Pointer(&procUnlinkat)), 3, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags), 0, 0, 0)
+	if errno != 0 {
+		return errno
+	}
+
+	return nil
+}
+
+func Openat(dirfd int, path string, flags int, perm uint32) (int, error) {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return 0, err
+	}
+
+	fd, _, errno := syscall6(uintptr(unsafe.Pointer(&procOpenat)), 4, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(flags), uintptr(perm), 0, 0)
+	if errno != 0 {
+		return 0, errno
+	}
+
+	return int(fd), nil
+}
+
+func Fstatat(dirfd int, path string, stat *syscall.Stat_t, flags int) error {
+	p, err := syscall.BytePtrFromString(path)
+	if err != nil {
+		return err
+	}
+
+	_, _, errno := syscall6(uintptr(unsafe.Pointer(&procFstatat)), 4, uintptr(dirfd), uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(stat)), uintptr(flags), 0, 0)
+	if errno != 0 {
+		return errno
+	}
+
+	return nil
+}
diff --git a/src/internal/syscall/unix/at_solaris.go b/src/internal/syscall/unix/at_solaris.go
new file mode 100644
index 0000000..e917c4f
--- /dev/null
+++ b/src/internal/syscall/unix/at_solaris.go
@@ -0,0 +1,19 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import "syscall"
+
+// Implemented as sysvicall6 in runtime/syscall_solaris.go.
+func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+
+//go:cgo_import_dynamic libc_fstatat fstatat "libc.so"
+//go:cgo_import_dynamic libc_openat openat "libc.so"
+//go:cgo_import_dynamic libc_unlinkat unlinkat "libc.so"
+
+const (
+	AT_REMOVEDIR        = 0x1
+	AT_SYMLINK_NOFOLLOW = 0x1000
+)
diff --git a/src/internal/syscall/unix/at_sysnum_darwin.go b/src/internal/syscall/unix/at_sysnum_darwin.go
new file mode 100644
index 0000000..aaaaa47
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_darwin.go
@@ -0,0 +1,8 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const AT_REMOVEDIR = 0x80
+const AT_SYMLINK_NOFOLLOW = 0x0020
diff --git a/src/internal/syscall/unix/at_sysnum_dragonfly.go b/src/internal/syscall/unix/at_sysnum_dragonfly.go
new file mode 100644
index 0000000..cec9abc
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_dragonfly.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import "syscall"
+
+const unlinkatTrap uintptr = syscall.SYS_UNLINKAT
+const openatTrap uintptr = syscall.SYS_OPENAT
+const fstatatTrap uintptr = syscall.SYS_FSTATAT
+
+const AT_REMOVEDIR = 0x2
+const AT_SYMLINK_NOFOLLOW = 0x1
diff --git a/src/internal/syscall/unix/at_sysnum_fstatat64_linux.go b/src/internal/syscall/unix/at_sysnum_fstatat64_linux.go
new file mode 100644
index 0000000..050d401
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_fstatat64_linux.go
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm || mips || mipsle || 386
+// +build arm mips mipsle 386
+
+package unix
+
+import "syscall"
+
+const fstatatTrap uintptr = syscall.SYS_FSTATAT64
diff --git a/src/internal/syscall/unix/at_sysnum_fstatat_linux.go b/src/internal/syscall/unix/at_sysnum_fstatat_linux.go
new file mode 100644
index 0000000..e53a2d1
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_fstatat_linux.go
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build arm64 || riscv64
+// +build arm64 riscv64
+
+package unix
+
+import "syscall"
+
+const fstatatTrap uintptr = syscall.SYS_FSTATAT
diff --git a/src/internal/syscall/unix/at_sysnum_linux.go b/src/internal/syscall/unix/at_sysnum_linux.go
new file mode 100644
index 0000000..fa7cd75
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_linux.go
@@ -0,0 +1,13 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import "syscall"
+
+const unlinkatTrap uintptr = syscall.SYS_UNLINKAT
+const openatTrap uintptr = syscall.SYS_OPENAT
+
+const AT_REMOVEDIR = 0x200
+const AT_SYMLINK_NOFOLLOW = 0x100
diff --git a/src/internal/syscall/unix/at_sysnum_netbsd.go b/src/internal/syscall/unix/at_sysnum_netbsd.go
new file mode 100644
index 0000000..fe45e29
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_netbsd.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import "syscall"
+
+const unlinkatTrap uintptr = syscall.SYS_UNLINKAT
+const openatTrap uintptr = syscall.SYS_OPENAT
+const fstatatTrap uintptr = syscall.SYS_FSTATAT
+
+const AT_REMOVEDIR = 0x800
+const AT_SYMLINK_NOFOLLOW = 0x200
diff --git a/src/internal/syscall/unix/at_sysnum_newfstatat_linux.go b/src/internal/syscall/unix/at_sysnum_newfstatat_linux.go
new file mode 100644
index 0000000..4cb4a59
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_newfstatat_linux.go
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 || mips64 || mips64le || ppc64 || ppc64le || s390x
+// +build amd64 mips64 mips64le ppc64 ppc64le s390x
+
+package unix
+
+import "syscall"
+
+const fstatatTrap uintptr = syscall.SYS_NEWFSTATAT
diff --git a/src/internal/syscall/unix/at_sysnum_openbsd.go b/src/internal/syscall/unix/at_sysnum_openbsd.go
new file mode 100644
index 0000000..c2d48b9
--- /dev/null
+++ b/src/internal/syscall/unix/at_sysnum_openbsd.go
@@ -0,0 +1,14 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import "syscall"
+
+const unlinkatTrap uintptr = syscall.SYS_UNLINKAT
+const openatTrap uintptr = syscall.SYS_OPENAT
+const fstatatTrap uintptr = syscall.SYS_FSTATAT
+
+const AT_REMOVEDIR = 0x08
+const AT_SYMLINK_NOFOLLOW = 0x02
diff --git a/src/internal/syscall/unix/copy_file_range_linux.go b/src/internal/syscall/unix/copy_file_range_linux.go
new file mode 100644
index 0000000..cf0a279
--- /dev/null
+++ b/src/internal/syscall/unix/copy_file_range_linux.go
@@ -0,0 +1,26 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+func CopyFileRange(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int, err error) {
+	r1, _, errno := syscall.Syscall6(copyFileRangeTrap,
+		uintptr(rfd),
+		uintptr(unsafe.Pointer(roff)),
+		uintptr(wfd),
+		uintptr(unsafe.Pointer(woff)),
+		uintptr(len),
+		uintptr(flags),
+	)
+	n = int(r1)
+	if errno != 0 {
+		err = errno
+	}
+	return
+}
diff --git a/src/internal/syscall/unix/fcntl_linux_32bit.go b/src/internal/syscall/unix/fcntl_linux_32bit.go
new file mode 100644
index 0000000..46a4f6b
--- /dev/null
+++ b/src/internal/syscall/unix/fcntl_linux_32bit.go
@@ -0,0 +1,17 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// On 32-bit Linux systems, use SYS_FCNTL64.
+// If you change the build tags here, see syscall/flock_linux_32bit.go.
+
+//go:build (linux && 386) || (linux && arm) || (linux && mips) || (linux && mipsle)
+// +build linux,386 linux,arm linux,mips linux,mipsle
+
+package unix
+
+import "syscall"
+
+func init() {
+	FcntlSyscall = syscall.SYS_FCNTL64
+}
diff --git a/src/internal/syscall/unix/getentropy_darwin.go b/src/internal/syscall/unix/getentropy_darwin.go
new file mode 100644
index 0000000..c75006b
--- /dev/null
+++ b/src/internal/syscall/unix/getentropy_darwin.go
@@ -0,0 +1,33 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin && !ios
+// +build darwin,!ios
+
+package unix
+
+import (
+	"internal/abi"
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_getentropy getentropy "/usr/lib/libSystem.B.dylib"
+
+func libc_getentropy_trampoline()
+
+// GetEntropy calls the macOS getentropy system call.
+func GetEntropy(p []byte) error {
+	_, _, errno := syscall_syscall(abi.FuncPCABI0(libc_getentropy_trampoline),
+		uintptr(unsafe.Pointer(&p[0])),
+		uintptr(len(p)),
+		0)
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+//go:linkname syscall_syscall syscall.syscall
+func syscall_syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
diff --git a/src/internal/syscall/unix/getentropy_openbsd.go b/src/internal/syscall/unix/getentropy_openbsd.go
new file mode 100644
index 0000000..d5caa80
--- /dev/null
+++ b/src/internal/syscall/unix/getentropy_openbsd.go
@@ -0,0 +1,25 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// getentropy(2)'s syscall number, from /usr/src/sys/kern/syscalls.master
+const entropyTrap uintptr = 7
+
+// GetEntropy calls the OpenBSD getentropy system call.
+func GetEntropy(p []byte) error {
+	_, _, errno := syscall.Syscall(entropyTrap,
+		uintptr(unsafe.Pointer(&p[0])),
+		uintptr(len(p)),
+		0)
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
diff --git a/src/internal/syscall/unix/getrandom.go b/src/internal/syscall/unix/getrandom.go
new file mode 100644
index 0000000..d2c58c0
--- /dev/null
+++ b/src/internal/syscall/unix/getrandom.go
@@ -0,0 +1,40 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux
+// +build dragonfly freebsd linux
+
+package unix
+
+import (
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+)
+
+var getrandomUnsupported int32 // atomic
+
+// GetRandomFlag is a flag supported by the getrandom system call.
+type GetRandomFlag uintptr
+
+// GetRandom calls the getrandom system call.
+func GetRandom(p []byte, flags GetRandomFlag) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	if atomic.LoadInt32(&getrandomUnsupported) != 0 {
+		return 0, syscall.ENOSYS
+	}
+	r1, _, errno := syscall.Syscall(getrandomTrap,
+		uintptr(unsafe.Pointer(&p[0])),
+		uintptr(len(p)),
+		uintptr(flags))
+	if errno != 0 {
+		if errno == syscall.ENOSYS {
+			atomic.StoreInt32(&getrandomUnsupported, 1)
+		}
+		return 0, errno
+	}
+	return int(r1), nil
+}
diff --git a/src/internal/syscall/unix/getrandom_dragonfly.go b/src/internal/syscall/unix/getrandom_dragonfly.go
new file mode 100644
index 0000000..fbf78f9
--- /dev/null
+++ b/src/internal/syscall/unix/getrandom_dragonfly.go
@@ -0,0 +1,16 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+// DragonFlyBSD getrandom system call number.
+const getrandomTrap uintptr = 550
+
+const (
+	// GRND_RANDOM is only set for portability purpose, no-op on DragonFlyBSD.
+	GRND_RANDOM GetRandomFlag = 0x0001
+
+	// GRND_NONBLOCK means return EAGAIN rather than blocking.
+	GRND_NONBLOCK GetRandomFlag = 0x0002
+)
diff --git a/src/internal/syscall/unix/getrandom_freebsd.go b/src/internal/syscall/unix/getrandom_freebsd.go
new file mode 100644
index 0000000..8c4f3df
--- /dev/null
+++ b/src/internal/syscall/unix/getrandom_freebsd.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+// FreeBSD getrandom system call number.
+const getrandomTrap uintptr = 563
+
+const (
+	// GRND_NONBLOCK means return EAGAIN rather than blocking.
+	GRND_NONBLOCK GetRandomFlag = 0x0001
+
+	// GRND_RANDOM is only set for portability purpose, no-op on FreeBSD.
+	GRND_RANDOM GetRandomFlag = 0x0002
+)
diff --git a/src/internal/syscall/unix/getrandom_linux.go b/src/internal/syscall/unix/getrandom_linux.go
new file mode 100644
index 0000000..8ccd8d3
--- /dev/null
+++ b/src/internal/syscall/unix/getrandom_linux.go
@@ -0,0 +1,13 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+	// GRND_NONBLOCK means return EAGAIN rather than blocking.
+	GRND_NONBLOCK GetRandomFlag = 0x0001
+
+	// GRND_RANDOM means use the /dev/random pool instead of /dev/urandom.
+	GRND_RANDOM GetRandomFlag = 0x0002
+)
diff --git a/src/internal/syscall/unix/getrandom_solaris.go b/src/internal/syscall/unix/getrandom_solaris.go
new file mode 100644
index 0000000..d86775c
--- /dev/null
+++ b/src/internal/syscall/unix/getrandom_solaris.go
@@ -0,0 +1,53 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"sync/atomic"
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_getrandom getrandom "libc.so"
+
+//go:linkname procGetrandom libc_getrandom
+
+var procGetrandom uintptr
+
+var getrandomUnsupported int32 // atomic
+
+// GetRandomFlag is a flag supported by the getrandom system call.
+type GetRandomFlag uintptr
+
+const (
+	// GRND_NONBLOCK means return EAGAIN rather than blocking.
+	GRND_NONBLOCK GetRandomFlag = 0x0001
+
+	// GRND_RANDOM means use the /dev/random pool instead of /dev/urandom.
+	GRND_RANDOM GetRandomFlag = 0x0002
+)
+
+// GetRandom calls the getrandom system call.
+func GetRandom(p []byte, flags GetRandomFlag) (n int, err error) {
+	if len(p) == 0 {
+		return 0, nil
+	}
+	if atomic.LoadInt32(&getrandomUnsupported) != 0 {
+		return 0, syscall.ENOSYS
+	}
+	r1, _, errno := syscall6(uintptr(unsafe.Pointer(&procGetrandom)),
+		3,
+		uintptr(unsafe.Pointer(&p[0])),
+		uintptr(len(p)),
+		uintptr(flags),
+		0, 0, 0)
+	if errno != 0 {
+		if errno == syscall.ENOSYS {
+			atomic.StoreInt32(&getrandomUnsupported, 1)
+		}
+		return 0, errno
+	}
+	return int(r1), nil
+}
diff --git a/src/internal/syscall/unix/ioctl_aix.go b/src/internal/syscall/unix/ioctl_aix.go
new file mode 100644
index 0000000..19d56c3
--- /dev/null
+++ b/src/internal/syscall/unix/ioctl_aix.go
@@ -0,0 +1,25 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_ioctl ioctl "libc.a/shr_64.o"
+//go:linkname libc_ioctl libc_ioctl
+var libc_ioctl uintptr
+
+// Implemented in syscall/syscall_aix.go.
+func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+
+func Ioctl(fd int, cmd int, args uintptr) (err error) {
+	_, _, e1 := syscall6(uintptr(unsafe.Pointer(&libc_ioctl)), 3, uintptr(fd), uintptr(cmd), uintptr(args), 0, 0, 0)
+	if e1 != 0 {
+		err = e1
+	}
+	return
+}
diff --git a/src/internal/syscall/unix/nonblocking.go b/src/internal/syscall/unix/nonblocking.go
new file mode 100644
index 0000000..a22986c
--- /dev/null
+++ b/src/internal/syscall/unix/nonblocking.go
@@ -0,0 +1,22 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build dragonfly || freebsd || linux || netbsd || openbsd
+// +build dragonfly freebsd linux netbsd openbsd
+
+package unix
+
+import "syscall"
+
+// FcntlSyscall is the number for the fcntl system call. This is
+// usually SYS_FCNTL, but can be overridden to SYS_FCNTL64.
+var FcntlSyscall uintptr = syscall.SYS_FCNTL
+
+func IsNonblock(fd int) (nonblocking bool, err error) {
+	flag, _, e1 := syscall.Syscall(FcntlSyscall, uintptr(fd), uintptr(syscall.F_GETFL), 0)
+	if e1 != 0 {
+		return false, e1
+	}
+	return flag&syscall.O_NONBLOCK != 0, nil
+}
diff --git a/src/internal/syscall/unix/nonblocking_js.go b/src/internal/syscall/unix/nonblocking_js.go
new file mode 100644
index 0000000..a5a5080
--- /dev/null
+++ b/src/internal/syscall/unix/nonblocking_js.go
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build js && wasm
+// +build js,wasm
+
+package unix
+
+func IsNonblock(fd int) (nonblocking bool, err error) {
+	return false, nil
+}
diff --git a/src/internal/syscall/unix/nonblocking_libc.go b/src/internal/syscall/unix/nonblocking_libc.go
new file mode 100644
index 0000000..d9565ef
--- /dev/null
+++ b/src/internal/syscall/unix/nonblocking_libc.go
@@ -0,0 +1,25 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix || darwin || solaris
+// +build aix darwin solaris
+
+package unix
+
+import (
+	"syscall"
+	_ "unsafe" // for go:linkname
+)
+
+func IsNonblock(fd int) (nonblocking bool, err error) {
+	flag, e1 := fcntl(fd, syscall.F_GETFL, 0)
+	if e1 != nil {
+		return false, e1
+	}
+	return flag&syscall.O_NONBLOCK != 0, nil
+}
+
+// Implemented in the syscall package.
+//go:linkname fcntl syscall.fcntl
+func fcntl(fd int, cmd int, arg int) (int, error)
diff --git a/src/internal/syscall/unix/pipe2_illumos.go b/src/internal/syscall/unix/pipe2_illumos.go
new file mode 100644
index 0000000..b0aac89
--- /dev/null
+++ b/src/internal/syscall/unix/pipe2_illumos.go
@@ -0,0 +1,35 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build illumos
+// +build illumos
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_pipe2 pipe2 "libc.so"
+
+//go:linkname procpipe2 libc_pipe2
+
+var procpipe2 uintptr
+
+type _C_int int32
+
+func Pipe2(p []int, flags int) error {
+	if len(p) != 2 {
+		return syscall.EINVAL
+	}
+	var pp [2]_C_int
+	_, _, errno := syscall6(uintptr(unsafe.Pointer(&procpipe2)), 2, uintptr(unsafe.Pointer(&pp)), uintptr(flags), 0, 0, 0, 0)
+	if errno != 0 {
+		return errno
+	}
+	p[0] = int(pp[0])
+	p[1] = int(pp[1])
+	return nil
+}
diff --git a/src/internal/syscall/unix/sysnum_linux_386.go b/src/internal/syscall/unix/sysnum_linux_386.go
new file mode 100644
index 0000000..2bda08c
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_386.go
@@ -0,0 +1,10 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 355
+	copyFileRangeTrap uintptr = 377
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_amd64.go b/src/internal/syscall/unix/sysnum_linux_amd64.go
new file mode 100644
index 0000000..ae5239e
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_amd64.go
@@ -0,0 +1,10 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 318
+	copyFileRangeTrap uintptr = 326
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_arm.go b/src/internal/syscall/unix/sysnum_linux_arm.go
new file mode 100644
index 0000000..acaec05
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_arm.go
@@ -0,0 +1,10 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 384
+	copyFileRangeTrap uintptr = 391
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_generic.go b/src/internal/syscall/unix/sysnum_linux_generic.go
new file mode 100644
index 0000000..a760254
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_generic.go
@@ -0,0 +1,18 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (arm64 || riscv64)
+// +build linux
+// +build arm64 riscv64
+
+package unix
+
+// This file is named "generic" because at a certain point Linux started
+// standardizing on system call numbers across architectures. So far this
+// means only arm64 and riscv64 use the standard numbers.
+
+const (
+	getrandomTrap     uintptr = 278
+	copyFileRangeTrap uintptr = 285
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_mips64x.go b/src/internal/syscall/unix/sysnum_linux_mips64x.go
new file mode 100644
index 0000000..f353d4d
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_mips64x.go
@@ -0,0 +1,13 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 5313
+	copyFileRangeTrap uintptr = 5320
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_mipsx.go b/src/internal/syscall/unix/sysnum_linux_mipsx.go
new file mode 100644
index 0000000..4ed4715
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_mipsx.go
@@ -0,0 +1,13 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+// +build mips mipsle
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 4353
+	copyFileRangeTrap uintptr = 4360
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_ppc64x.go b/src/internal/syscall/unix/sysnum_linux_ppc64x.go
new file mode 100644
index 0000000..b484ffe
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_ppc64x.go
@@ -0,0 +1,13 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 359
+	copyFileRangeTrap uintptr = 379
+)
diff --git a/src/internal/syscall/unix/sysnum_linux_s390x.go b/src/internal/syscall/unix/sysnum_linux_s390x.go
new file mode 100644
index 0000000..bf2c01e
--- /dev/null
+++ b/src/internal/syscall/unix/sysnum_linux_s390x.go
@@ -0,0 +1,10 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+	getrandomTrap     uintptr = 349
+	copyFileRangeTrap uintptr = 375
+)
diff --git a/src/internal/syscall/unix/writev_illumos.go b/src/internal/syscall/unix/writev_illumos.go
new file mode 100644
index 0000000..f60949f
--- /dev/null
+++ b/src/internal/syscall/unix/writev_illumos.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build illumos
+// +build illumos
+
+package unix
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_writev writev "libc.so"
+
+//go:linkname procwritev libc_writev
+
+var procwritev uintptr
+
+func Writev(fd int, iovs []syscall.Iovec) (uintptr, error) {
+	var p *syscall.Iovec
+	if len(iovs) > 0 {
+		p = &iovs[0]
+	}
+	n, _, errno := syscall6(uintptr(unsafe.Pointer(&procwritev)), 3, uintptr(fd), uintptr(unsafe.Pointer(p)), uintptr(len(iovs)), 0, 0, 0)
+	if errno != 0 {
+		return 0, errno
+	}
+	return n, nil
+}
diff --git a/src/internal/syscall/windows/exec_windows_test.go b/src/internal/syscall/windows/exec_windows_test.go
new file mode 100644
index 0000000..283d7ce
--- /dev/null
+++ b/src/internal/syscall/windows/exec_windows_test.go
@@ -0,0 +1,139 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package windows_test
+
+import (
+	"fmt"
+	"internal/syscall/windows"
+	"os"
+	"os/exec"
+	"syscall"
+	"testing"
+	"unsafe"
+)
+
+func TestRunAtLowIntegrity(t *testing.T) {
+	if os.Getenv("GO_WANT_HELPER_PROCESS") == "1" {
+		wil, err := getProcessIntegrityLevel()
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "error: %s\n", err.Error())
+			os.Exit(9)
+			return
+		}
+		fmt.Printf("%s", wil)
+		os.Exit(0)
+		return
+	}
+
+	cmd := exec.Command(os.Args[0], "-test.run=TestRunAtLowIntegrity", "--")
+	cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
+
+	token, err := getIntegrityLevelToken(sidWilLow)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer token.Close()
+
+	cmd.SysProcAttr = &syscall.SysProcAttr{
+		Token: token,
+	}
+
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if string(out) != sidWilLow {
+		t.Fatalf("Child process did not run as low integrity level: %s", string(out))
+	}
+}
+
+const (
+	sidWilLow = `S-1-16-4096`
+)
+
+func getProcessIntegrityLevel() (string, error) {
+	procToken, err := syscall.OpenCurrentProcessToken()
+	if err != nil {
+		return "", err
+	}
+	defer procToken.Close()
+
+	p, err := tokenGetInfo(procToken, syscall.TokenIntegrityLevel, 64)
+	if err != nil {
+		return "", err
+	}
+
+	tml := (*windows.TOKEN_MANDATORY_LABEL)(p)
+
+	sid := (*syscall.SID)(unsafe.Pointer(tml.Label.Sid))
+
+	return sid.String()
+}
+
+func tokenGetInfo(t syscall.Token, class uint32, initSize int) (unsafe.Pointer, error) {
+	n := uint32(initSize)
+	for {
+		b := make([]byte, n)
+		e := syscall.GetTokenInformation(t, class, &b[0], uint32(len(b)), &n)
+		if e == nil {
+			return unsafe.Pointer(&b[0]), nil
+		}
+		if e != syscall.ERROR_INSUFFICIENT_BUFFER {
+			return nil, e
+		}
+		if n <= uint32(len(b)) {
+			return nil, e
+		}
+	}
+}
+
+func getIntegrityLevelToken(wns string) (syscall.Token, error) {
+	var procToken, token syscall.Token
+
+	proc, err := syscall.GetCurrentProcess()
+	if err != nil {
+		return 0, err
+	}
+	defer syscall.CloseHandle(proc)
+
+	err = syscall.OpenProcessToken(proc,
+		syscall.TOKEN_DUPLICATE|
+			syscall.TOKEN_ADJUST_DEFAULT|
+			syscall.TOKEN_QUERY|
+			syscall.TOKEN_ASSIGN_PRIMARY,
+		&procToken)
+	if err != nil {
+		return 0, err
+	}
+	defer procToken.Close()
+
+	sid, err := syscall.StringToSid(wns)
+	if err != nil {
+		return 0, err
+	}
+
+	tml := &windows.TOKEN_MANDATORY_LABEL{}
+	tml.Label.Attributes = windows.SE_GROUP_INTEGRITY
+	tml.Label.Sid = sid
+
+	err = windows.DuplicateTokenEx(procToken, 0, nil, windows.SecurityImpersonation,
+		windows.TokenPrimary, &token)
+	if err != nil {
+		return 0, err
+	}
+
+	err = windows.SetTokenInformation(token,
+		syscall.TokenIntegrityLevel,
+		uintptr(unsafe.Pointer(tml)),
+		tml.Size())
+	if err != nil {
+		token.Close()
+		return 0, err
+	}
+	return token, nil
+}
diff --git a/src/internal/syscall/windows/mksyscall.go b/src/internal/syscall/windows/mksyscall.go
new file mode 100644
index 0000000..599f076
--- /dev/null
+++ b/src/internal/syscall/windows/mksyscall.go
@@ -0,0 +1,9 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build generate
+
+package windows
+
+//go:generate go run ../../../syscall/mksyscall_windows.go -output zsyscall_windows.go syscall_windows.go security_windows.go psapi_windows.go symlink_windows.go
diff --git a/src/internal/syscall/windows/psapi_windows.go b/src/internal/syscall/windows/psapi_windows.go
new file mode 100644
index 0000000..b138e65
--- /dev/null
+++ b/src/internal/syscall/windows/psapi_windows.go
@@ -0,0 +1,20 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+type PROCESS_MEMORY_COUNTERS struct {
+	CB                         uint32
+	PageFaultCount             uint32
+	PeakWorkingSetSize         uintptr
+	WorkingSetSize             uintptr
+	QuotaPeakPagedPoolUsage    uintptr
+	QuotaPagedPoolUsage        uintptr
+	QuotaPeakNonPagedPoolUsage uintptr
+	QuotaNonPagedPoolUsage     uintptr
+	PagefileUsage              uintptr
+	PeakPagefileUsage          uintptr
+}
+
+//sys	GetProcessMemoryInfo(handle syscall.Handle, memCounters *PROCESS_MEMORY_COUNTERS, cb uint32) (err error) = psapi.GetProcessMemoryInfo
diff --git a/src/internal/syscall/windows/registry/export_test.go b/src/internal/syscall/windows/registry/export_test.go
new file mode 100644
index 0000000..8badf6f
--- /dev/null
+++ b/src/internal/syscall/windows/registry/export_test.go
@@ -0,0 +1,11 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package registry
+
+func (k Key) SetValue(name string, valtype uint32, data []byte) error {
+	return k.setValue(name, valtype, data)
+}
diff --git a/src/internal/syscall/windows/registry/key.go b/src/internal/syscall/windows/registry/key.go
new file mode 100644
index 0000000..612c48f
--- /dev/null
+++ b/src/internal/syscall/windows/registry/key.go
@@ -0,0 +1,160 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+// Package registry provides access to the Windows registry.
+//
+// Here is a simple example, opening a registry key and reading a string value from it.
+//
+//	k, err := registry.OpenKey(registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
+//	if err != nil {
+//		log.Fatal(err)
+//	}
+//	defer k.Close()
+//
+//	s, _, err := k.GetStringValue("SystemRoot")
+//	if err != nil {
+//		log.Fatal(err)
+//	}
+//	fmt.Printf("Windows system root is %q\n", s)
+//
+// NOTE: This package is a copy of golang.org/x/sys/windows/registry
+// with KeyInfo.ModTime removed to prevent dependency cycles.
+//
+package registry
+
+import "syscall"
+
+const (
+	// Registry key security and access rights.
+	// See https://msdn.microsoft.com/en-us/library/windows/desktop/ms724878.aspx
+	// for details.
+	ALL_ACCESS         = 0xf003f
+	CREATE_LINK        = 0x00020
+	CREATE_SUB_KEY     = 0x00004
+	ENUMERATE_SUB_KEYS = 0x00008
+	EXECUTE            = 0x20019
+	NOTIFY             = 0x00010
+	QUERY_VALUE        = 0x00001
+	READ               = 0x20019
+	SET_VALUE          = 0x00002
+	WOW64_32KEY        = 0x00200
+	WOW64_64KEY        = 0x00100
+	WRITE              = 0x20006
+)
+
+// Key is a handle to an open Windows registry key.
+// Keys can be obtained by calling OpenKey; there are
+// also some predefined root keys such as CURRENT_USER.
+// Keys can be used directly in the Windows API.
+type Key syscall.Handle
+
+const (
+	// Windows defines some predefined root keys that are always open.
+	// An application can use these keys as entry points to the registry.
+	// Normally these keys are used in OpenKey to open new keys,
+	// but they can also be used anywhere a Key is required.
+	CLASSES_ROOT   = Key(syscall.HKEY_CLASSES_ROOT)
+	CURRENT_USER   = Key(syscall.HKEY_CURRENT_USER)
+	LOCAL_MACHINE  = Key(syscall.HKEY_LOCAL_MACHINE)
+	USERS          = Key(syscall.HKEY_USERS)
+	CURRENT_CONFIG = Key(syscall.HKEY_CURRENT_CONFIG)
+)
+
+// Close closes open key k.
+func (k Key) Close() error {
+	return syscall.RegCloseKey(syscall.Handle(k))
+}
+
+// OpenKey opens a new key with path name relative to key k.
+// It accepts any open key, including CURRENT_USER and others,
+// and returns the new key and an error.
+// The access parameter specifies desired access rights to the
+// key to be opened.
+func OpenKey(k Key, path string, access uint32) (Key, error) {
+	p, err := syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return 0, err
+	}
+	var subkey syscall.Handle
+	err = syscall.RegOpenKeyEx(syscall.Handle(k), p, 0, access, &subkey)
+	if err != nil {
+		return 0, err
+	}
+	return Key(subkey), nil
+}
+
+// ReadSubKeyNames returns the names of subkeys of key k.
+func (k Key) ReadSubKeyNames() ([]string, error) {
+	names := make([]string, 0)
+	// Registry key size limit is 255 bytes and described there:
+	// https://msdn.microsoft.com/library/windows/desktop/ms724872.aspx
+	buf := make([]uint16, 256) //plus extra room for terminating zero byte
+loopItems:
+	for i := uint32(0); ; i++ {
+		l := uint32(len(buf))
+		for {
+			err := syscall.RegEnumKeyEx(syscall.Handle(k), i, &buf[0], &l, nil, nil, nil, nil)
+			if err == nil {
+				break
+			}
+			if err == syscall.ERROR_MORE_DATA {
+				// Double buffer size and try again.
+				l = uint32(2 * len(buf))
+				buf = make([]uint16, l)
+				continue
+			}
+			if err == _ERROR_NO_MORE_ITEMS {
+				break loopItems
+			}
+			return names, err
+		}
+		names = append(names, syscall.UTF16ToString(buf[:l]))
+	}
+	return names, nil
+}
+
+// CreateKey creates a key named path under open key k.
+// CreateKey returns the new key and a boolean flag that reports
+// whether the key already existed.
+// The access parameter specifies the access rights for the key
+// to be created.
+func CreateKey(k Key, path string, access uint32) (newk Key, openedExisting bool, err error) {
+	var h syscall.Handle
+	var d uint32
+	err = regCreateKeyEx(syscall.Handle(k), syscall.StringToUTF16Ptr(path),
+		0, nil, _REG_OPTION_NON_VOLATILE, access, nil, &h, &d)
+	if err != nil {
+		return 0, false, err
+	}
+	return Key(h), d == _REG_OPENED_EXISTING_KEY, nil
+}
+
+// DeleteKey deletes the subkey path of key k and its values.
+func DeleteKey(k Key, path string) error {
+	return regDeleteKey(syscall.Handle(k), syscall.StringToUTF16Ptr(path))
+}
+
+// A KeyInfo describes the statistics of a key. It is returned by Stat.
+type KeyInfo struct {
+	SubKeyCount     uint32
+	MaxSubKeyLen    uint32 // size of the key's subkey with the longest name, in Unicode characters, not including the terminating zero byte
+	ValueCount      uint32
+	MaxValueNameLen uint32 // size of the key's longest value name, in Unicode characters, not including the terminating zero byte
+	MaxValueLen     uint32 // longest data component among the key's values, in bytes
+	lastWriteTime   syscall.Filetime
+}
+
+// Stat retrieves information about the open key k.
+func (k Key) Stat() (*KeyInfo, error) {
+	var ki KeyInfo
+	err := syscall.RegQueryInfoKey(syscall.Handle(k), nil, nil, nil,
+		&ki.SubKeyCount, &ki.MaxSubKeyLen, nil, &ki.ValueCount,
+		&ki.MaxValueNameLen, &ki.MaxValueLen, nil, &ki.lastWriteTime)
+	if err != nil {
+		return nil, err
+	}
+	return &ki, nil
+}
diff --git a/src/internal/syscall/windows/registry/mksyscall.go b/src/internal/syscall/windows/registry/mksyscall.go
new file mode 100644
index 0000000..320abf7
--- /dev/null
+++ b/src/internal/syscall/windows/registry/mksyscall.go
@@ -0,0 +1,9 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build generate
+
+package registry
+
+//go:generate go run ../../../../syscall/mksyscall_windows.go -output zsyscall_windows.go syscall.go
diff --git a/src/internal/syscall/windows/registry/registry_test.go b/src/internal/syscall/windows/registry/registry_test.go
new file mode 100644
index 0000000..5797162
--- /dev/null
+++ b/src/internal/syscall/windows/registry/registry_test.go
@@ -0,0 +1,672 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package registry_test
+
+import (
+	"bytes"
+	"crypto/rand"
+	"os"
+	"syscall"
+	"testing"
+	"unsafe"
+
+	"internal/syscall/windows/registry"
+)
+
+func randKeyName(prefix string) string {
+	const numbers = "0123456789"
+	buf := make([]byte, 10)
+	rand.Read(buf)
+	for i, b := range buf {
+		buf[i] = numbers[b%byte(len(numbers))]
+	}
+	return prefix + string(buf)
+}
+
+func TestReadSubKeyNames(t *testing.T) {
+	k, err := registry.OpenKey(registry.CLASSES_ROOT, "TypeLib", registry.ENUMERATE_SUB_KEYS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer k.Close()
+
+	names, err := k.ReadSubKeyNames()
+	if err != nil {
+		t.Fatal(err)
+	}
+	var foundStdOle bool
+	for _, name := range names {
+		// Every PC has "stdole 2.0 OLE Automation" library installed.
+		if name == "{00020430-0000-0000-C000-000000000046}" {
+			foundStdOle = true
+		}
+	}
+	if !foundStdOle {
+		t.Fatal("could not find stdole 2.0 OLE Automation")
+	}
+}
+
+func TestCreateOpenDeleteKey(t *testing.T) {
+	k, err := registry.OpenKey(registry.CURRENT_USER, "Software", registry.QUERY_VALUE)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer k.Close()
+
+	testKName := randKeyName("TestCreateOpenDeleteKey_")
+
+	testK, exist, err := registry.CreateKey(k, testKName, registry.CREATE_SUB_KEY)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer testK.Close()
+
+	if exist {
+		t.Fatalf("key %q already exists", testKName)
+	}
+
+	testKAgain, exist, err := registry.CreateKey(k, testKName, registry.CREATE_SUB_KEY)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer testKAgain.Close()
+
+	if !exist {
+		t.Fatalf("key %q should already exist", testKName)
+	}
+
+	testKOpened, err := registry.OpenKey(k, testKName, registry.ENUMERATE_SUB_KEYS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer testKOpened.Close()
+
+	err = registry.DeleteKey(k, testKName)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	testKOpenedAgain, err := registry.OpenKey(k, testKName, registry.ENUMERATE_SUB_KEYS)
+	if err == nil {
+		defer testKOpenedAgain.Close()
+		t.Fatalf("key %q should already been deleted", testKName)
+	}
+	if err != registry.ErrNotExist {
+		t.Fatalf(`unexpected error ("not exist" expected): %v`, err)
+	}
+}
+
+func equalStringSlice(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	if a == nil {
+		return true
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
+
+type ValueTest struct {
+	Type     uint32
+	Name     string
+	Value    interface{}
+	WillFail bool
+}
+
+var ValueTests = []ValueTest{
+	{Type: registry.SZ, Name: "String1", Value: ""},
+	{Type: registry.SZ, Name: "String2", Value: "\000", WillFail: true},
+	{Type: registry.SZ, Name: "String3", Value: "Hello World"},
+	{Type: registry.SZ, Name: "String4", Value: "Hello World\000", WillFail: true},
+	{Type: registry.EXPAND_SZ, Name: "ExpString1", Value: ""},
+	{Type: registry.EXPAND_SZ, Name: "ExpString2", Value: "\000", WillFail: true},
+	{Type: registry.EXPAND_SZ, Name: "ExpString3", Value: "Hello World"},
+	{Type: registry.EXPAND_SZ, Name: "ExpString4", Value: "Hello\000World", WillFail: true},
+	{Type: registry.EXPAND_SZ, Name: "ExpString5", Value: "%PATH%"},
+	{Type: registry.EXPAND_SZ, Name: "ExpString6", Value: "%NO_SUCH_VARIABLE%"},
+	{Type: registry.EXPAND_SZ, Name: "ExpString7", Value: "%PATH%;."},
+	{Type: registry.BINARY, Name: "Binary1", Value: []byte{}},
+	{Type: registry.BINARY, Name: "Binary2", Value: []byte{1, 2, 3}},
+	{Type: registry.BINARY, Name: "Binary3", Value: []byte{3, 2, 1, 0, 1, 2, 3}},
+	{Type: registry.DWORD, Name: "Dword1", Value: uint64(0)},
+	{Type: registry.DWORD, Name: "Dword2", Value: uint64(1)},
+	{Type: registry.DWORD, Name: "Dword3", Value: uint64(0xff)},
+	{Type: registry.DWORD, Name: "Dword4", Value: uint64(0xffff)},
+	{Type: registry.QWORD, Name: "Qword1", Value: uint64(0)},
+	{Type: registry.QWORD, Name: "Qword2", Value: uint64(1)},
+	{Type: registry.QWORD, Name: "Qword3", Value: uint64(0xff)},
+	{Type: registry.QWORD, Name: "Qword4", Value: uint64(0xffff)},
+	{Type: registry.QWORD, Name: "Qword5", Value: uint64(0xffffff)},
+	{Type: registry.QWORD, Name: "Qword6", Value: uint64(0xffffffff)},
+	{Type: registry.MULTI_SZ, Name: "MultiString1", Value: []string{"a", "b", "c"}},
+	{Type: registry.MULTI_SZ, Name: "MultiString2", Value: []string{"abc", "", "cba"}},
+	{Type: registry.MULTI_SZ, Name: "MultiString3", Value: []string{""}},
+	{Type: registry.MULTI_SZ, Name: "MultiString4", Value: []string{"abcdef"}},
+	{Type: registry.MULTI_SZ, Name: "MultiString5", Value: []string{"\000"}, WillFail: true},
+	{Type: registry.MULTI_SZ, Name: "MultiString6", Value: []string{"a\000b"}, WillFail: true},
+	{Type: registry.MULTI_SZ, Name: "MultiString7", Value: []string{"ab", "\000", "cd"}, WillFail: true},
+	{Type: registry.MULTI_SZ, Name: "MultiString8", Value: []string{"\000", "cd"}, WillFail: true},
+	{Type: registry.MULTI_SZ, Name: "MultiString9", Value: []string{"ab", "\000"}, WillFail: true},
+}
+
+func setValues(t *testing.T, k registry.Key) {
+	for _, test := range ValueTests {
+		var err error
+		switch test.Type {
+		case registry.SZ:
+			err = k.SetStringValue(test.Name, test.Value.(string))
+		case registry.EXPAND_SZ:
+			err = k.SetExpandStringValue(test.Name, test.Value.(string))
+		case registry.MULTI_SZ:
+			err = k.SetStringsValue(test.Name, test.Value.([]string))
+		case registry.BINARY:
+			err = k.SetBinaryValue(test.Name, test.Value.([]byte))
+		case registry.DWORD:
+			err = k.SetDWordValue(test.Name, uint32(test.Value.(uint64)))
+		case registry.QWORD:
+			err = k.SetQWordValue(test.Name, test.Value.(uint64))
+		default:
+			t.Fatalf("unsupported type %d for %s value", test.Type, test.Name)
+		}
+		if test.WillFail {
+			if err == nil {
+				t.Fatalf("setting %s value %q should fail, but succeeded", test.Name, test.Value)
+			}
+		} else {
+			if err != nil {
+				t.Fatal(err)
+			}
+		}
+	}
+}
+
+func enumerateValues(t *testing.T, k registry.Key) {
+	names, err := k.ReadValueNames()
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	haveNames := make(map[string]bool)
+	for _, n := range names {
+		haveNames[n] = false
+	}
+	for _, test := range ValueTests {
+		wantFound := !test.WillFail
+		_, haveFound := haveNames[test.Name]
+		if wantFound && !haveFound {
+			t.Errorf("value %s is not found while enumerating", test.Name)
+		}
+		if haveFound && !wantFound {
+			t.Errorf("value %s is found while enumerating, but expected to fail", test.Name)
+		}
+		if haveFound {
+			delete(haveNames, test.Name)
+		}
+	}
+	for n, v := range haveNames {
+		t.Errorf("value %s (%v) is found while enumerating, but has not been created", n, v)
+	}
+}
+
+func testErrNotExist(t *testing.T, name string, err error) {
+	if err == nil {
+		t.Errorf("%s value should not exist", name)
+		return
+	}
+	if err != registry.ErrNotExist {
+		t.Errorf("reading %s value should return 'not exist' error, but got: %s", name, err)
+		return
+	}
+}
+
+func testErrUnexpectedType(t *testing.T, test ValueTest, gottype uint32, err error) {
+	if err == nil {
+		t.Errorf("GetXValue(%q) should not succeed", test.Name)
+		return
+	}
+	if err != registry.ErrUnexpectedType {
+		t.Errorf("reading %s value should return 'unexpected key value type' error, but got: %s", test.Name, err)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+}
+
+func testGetStringValue(t *testing.T, k registry.Key, test ValueTest) {
+	got, gottype, err := k.GetStringValue(test.Name)
+	if err != nil {
+		t.Errorf("GetStringValue(%s) failed: %v", test.Name, err)
+		return
+	}
+	if got != test.Value {
+		t.Errorf("want %s value %q, got %q", test.Name, test.Value, got)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+	if gottype == registry.EXPAND_SZ {
+		_, err = registry.ExpandString(got)
+		if err != nil {
+			t.Errorf("ExpandString(%s) failed: %v", got, err)
+			return
+		}
+	}
+}
+
+func testGetIntegerValue(t *testing.T, k registry.Key, test ValueTest) {
+	got, gottype, err := k.GetIntegerValue(test.Name)
+	if err != nil {
+		t.Errorf("GetIntegerValue(%s) failed: %v", test.Name, err)
+		return
+	}
+	if got != test.Value.(uint64) {
+		t.Errorf("want %s value %v, got %v", test.Name, test.Value, got)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+}
+
+func testGetBinaryValue(t *testing.T, k registry.Key, test ValueTest) {
+	got, gottype, err := k.GetBinaryValue(test.Name)
+	if err != nil {
+		t.Errorf("GetBinaryValue(%s) failed: %v", test.Name, err)
+		return
+	}
+	if !bytes.Equal(got, test.Value.([]byte)) {
+		t.Errorf("want %s value %v, got %v", test.Name, test.Value, got)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+}
+
+func testGetStringsValue(t *testing.T, k registry.Key, test ValueTest) {
+	got, gottype, err := k.GetStringsValue(test.Name)
+	if err != nil {
+		t.Errorf("GetStringsValue(%s) failed: %v", test.Name, err)
+		return
+	}
+	if !equalStringSlice(got, test.Value.([]string)) {
+		t.Errorf("want %s value %#v, got %#v", test.Name, test.Value, got)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+}
+
+func testGetValue(t *testing.T, k registry.Key, test ValueTest, size int) {
+	if size <= 0 {
+		return
+	}
+	// read data with no buffer
+	gotsize, gottype, err := k.GetValue(test.Name, nil)
+	if err != nil {
+		t.Errorf("GetValue(%s, [%d]byte) failed: %v", test.Name, size, err)
+		return
+	}
+	if gotsize != size {
+		t.Errorf("want %s value size of %d, got %v", test.Name, size, gotsize)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+	// read data with short buffer
+	gotsize, gottype, err = k.GetValue(test.Name, make([]byte, size-1))
+	if err == nil {
+		t.Errorf("GetValue(%s, [%d]byte) should fail, but succeeded", test.Name, size-1)
+		return
+	}
+	if err != registry.ErrShortBuffer {
+		t.Errorf("reading %s value should return 'short buffer' error, but got: %s", test.Name, err)
+		return
+	}
+	if gotsize != size {
+		t.Errorf("want %s value size of %d, got %v", test.Name, size, gotsize)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+	// read full data
+	gotsize, gottype, err = k.GetValue(test.Name, make([]byte, size))
+	if err != nil {
+		t.Errorf("GetValue(%s, [%d]byte) failed: %v", test.Name, size, err)
+		return
+	}
+	if gotsize != size {
+		t.Errorf("want %s value size of %d, got %v", test.Name, size, gotsize)
+		return
+	}
+	if gottype != test.Type {
+		t.Errorf("want %s value type %v, got %v", test.Name, test.Type, gottype)
+		return
+	}
+	// check GetValue returns ErrNotExist as required
+	_, _, err = k.GetValue(test.Name+"_not_there", make([]byte, size))
+	if err == nil {
+		t.Errorf("GetValue(%q) should not succeed", test.Name)
+		return
+	}
+	if err != registry.ErrNotExist {
+		t.Errorf("GetValue(%q) should return 'not exist' error, but got: %s", test.Name, err)
+		return
+	}
+}
+
+func testValues(t *testing.T, k registry.Key) {
+	for _, test := range ValueTests {
+		switch test.Type {
+		case registry.SZ, registry.EXPAND_SZ:
+			if test.WillFail {
+				_, _, err := k.GetStringValue(test.Name)
+				testErrNotExist(t, test.Name, err)
+			} else {
+				testGetStringValue(t, k, test)
+				_, gottype, err := k.GetIntegerValue(test.Name)
+				testErrUnexpectedType(t, test, gottype, err)
+				// Size of utf16 string in bytes is not perfect,
+				// but correct for current test values.
+				// Size also includes terminating 0.
+				testGetValue(t, k, test, (len(test.Value.(string))+1)*2)
+			}
+			_, _, err := k.GetStringValue(test.Name + "_string_not_created")
+			testErrNotExist(t, test.Name+"_string_not_created", err)
+		case registry.DWORD, registry.QWORD:
+			testGetIntegerValue(t, k, test)
+			_, gottype, err := k.GetBinaryValue(test.Name)
+			testErrUnexpectedType(t, test, gottype, err)
+			_, _, err = k.GetIntegerValue(test.Name + "_int_not_created")
+			testErrNotExist(t, test.Name+"_int_not_created", err)
+			size := 8
+			if test.Type == registry.DWORD {
+				size = 4
+			}
+			testGetValue(t, k, test, size)
+		case registry.BINARY:
+			testGetBinaryValue(t, k, test)
+			_, gottype, err := k.GetStringsValue(test.Name)
+			testErrUnexpectedType(t, test, gottype, err)
+			_, _, err = k.GetBinaryValue(test.Name + "_byte_not_created")
+			testErrNotExist(t, test.Name+"_byte_not_created", err)
+			testGetValue(t, k, test, len(test.Value.([]byte)))
+		case registry.MULTI_SZ:
+			if test.WillFail {
+				_, _, err := k.GetStringsValue(test.Name)
+				testErrNotExist(t, test.Name, err)
+			} else {
+				testGetStringsValue(t, k, test)
+				_, gottype, err := k.GetStringValue(test.Name)
+				testErrUnexpectedType(t, test, gottype, err)
+				size := 0
+				for _, s := range test.Value.([]string) {
+					size += len(s) + 1 // nil terminated
+				}
+				size += 1 // extra nil at the end
+				size *= 2 // count bytes, not uint16
+				testGetValue(t, k, test, size)
+			}
+			_, _, err := k.GetStringsValue(test.Name + "_strings_not_created")
+			testErrNotExist(t, test.Name+"_strings_not_created", err)
+		default:
+			t.Errorf("unsupported type %d for %s value", test.Type, test.Name)
+			continue
+		}
+	}
+}
+
+func testStat(t *testing.T, k registry.Key) {
+	subk, _, err := registry.CreateKey(k, "subkey", registry.CREATE_SUB_KEY)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	defer subk.Close()
+
+	defer registry.DeleteKey(k, "subkey")
+
+	ki, err := k.Stat()
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	if ki.SubKeyCount != 1 {
+		t.Error("key must have 1 subkey")
+	}
+	if ki.MaxSubKeyLen != 6 {
+		t.Error("key max subkey name length must be 6")
+	}
+	if ki.ValueCount != 24 {
+		t.Errorf("key must have 24 values, but is %d", ki.ValueCount)
+	}
+	if ki.MaxValueNameLen != 12 {
+		t.Errorf("key max value name length must be 10, but is %d", ki.MaxValueNameLen)
+	}
+	if ki.MaxValueLen != 38 {
+		t.Errorf("key max value length must be 38, but is %d", ki.MaxValueLen)
+	}
+}
+
+func deleteValues(t *testing.T, k registry.Key) {
+	for _, test := range ValueTests {
+		if test.WillFail {
+			continue
+		}
+		err := k.DeleteValue(test.Name)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+	}
+	names, err := k.ReadValueNames()
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	if len(names) != 0 {
+		t.Errorf("some values remain after deletion: %v", names)
+	}
+}
+
+func TestValues(t *testing.T) {
+	softwareK, err := registry.OpenKey(registry.CURRENT_USER, "Software", registry.QUERY_VALUE)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer softwareK.Close()
+
+	testKName := randKeyName("TestValues_")
+
+	k, exist, err := registry.CreateKey(softwareK, testKName, registry.CREATE_SUB_KEY|registry.QUERY_VALUE|registry.SET_VALUE)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer k.Close()
+
+	if exist {
+		t.Fatalf("key %q already exists", testKName)
+	}
+
+	defer registry.DeleteKey(softwareK, testKName)
+
+	setValues(t, k)
+
+	enumerateValues(t, k)
+
+	testValues(t, k)
+
+	testStat(t, k)
+
+	deleteValues(t, k)
+}
+
+func TestExpandString(t *testing.T) {
+	got, err := registry.ExpandString("%PATH%")
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := os.Getenv("PATH")
+	if got != want {
+		t.Errorf("want %q string expanded, got %q", want, got)
+	}
+}
+
+func TestInvalidValues(t *testing.T) {
+	softwareK, err := registry.OpenKey(registry.CURRENT_USER, "Software", registry.QUERY_VALUE)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer softwareK.Close()
+
+	testKName := randKeyName("TestInvalidValues_")
+
+	k, exist, err := registry.CreateKey(softwareK, testKName, registry.CREATE_SUB_KEY|registry.QUERY_VALUE|registry.SET_VALUE)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer k.Close()
+
+	if exist {
+		t.Fatalf("key %q already exists", testKName)
+	}
+
+	defer registry.DeleteKey(softwareK, testKName)
+
+	var tests = []struct {
+		Type uint32
+		Name string
+		Data []byte
+	}{
+		{registry.DWORD, "Dword1", nil},
+		{registry.DWORD, "Dword2", []byte{1, 2, 3}},
+		{registry.QWORD, "Qword1", nil},
+		{registry.QWORD, "Qword2", []byte{1, 2, 3}},
+		{registry.QWORD, "Qword3", []byte{1, 2, 3, 4, 5, 6, 7}},
+		{registry.MULTI_SZ, "MultiString1", nil},
+		{registry.MULTI_SZ, "MultiString2", []byte{0}},
+		{registry.MULTI_SZ, "MultiString3", []byte{'a', 'b', 0}},
+		{registry.MULTI_SZ, "MultiString4", []byte{'a', 0, 0, 'b', 0}},
+		{registry.MULTI_SZ, "MultiString5", []byte{'a', 0, 0}},
+	}
+
+	for _, test := range tests {
+		err := k.SetValue(test.Name, test.Type, test.Data)
+		if err != nil {
+			t.Fatalf("SetValue for %q failed: %v", test.Name, err)
+		}
+	}
+
+	for _, test := range tests {
+		switch test.Type {
+		case registry.DWORD, registry.QWORD:
+			value, valType, err := k.GetIntegerValue(test.Name)
+			if err == nil {
+				t.Errorf("GetIntegerValue(%q) succeeded. Returns type=%d value=%v", test.Name, valType, value)
+			}
+		case registry.MULTI_SZ:
+			value, valType, err := k.GetStringsValue(test.Name)
+			if err == nil {
+				if len(value) != 0 {
+					t.Errorf("GetStringsValue(%q) succeeded. Returns type=%d value=%v", test.Name, valType, value)
+				}
+			}
+		default:
+			t.Errorf("unsupported type %d for %s value", test.Type, test.Name)
+		}
+	}
+}
+
+func TestGetMUIStringValue(t *testing.T) {
+	if err := registry.LoadRegLoadMUIString(); err != nil {
+		t.Skip("regLoadMUIString not supported; skipping")
+	}
+	if err := procGetDynamicTimeZoneInformation.Find(); err != nil {
+		t.Skipf("%s not supported; skipping", procGetDynamicTimeZoneInformation.Name)
+	}
+	var dtzi DynamicTimezoneinformation
+	if _, err := GetDynamicTimeZoneInformation(&dtzi); err != nil {
+		t.Fatal(err)
+	}
+	tzKeyName := syscall.UTF16ToString(dtzi.TimeZoneKeyName[:])
+	timezoneK, err := registry.OpenKey(registry.LOCAL_MACHINE,
+		`SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\`+tzKeyName, registry.READ)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer timezoneK.Close()
+
+	type testType struct {
+		name string
+		want string
+	}
+	var tests = []testType{
+		{"MUI_Std", syscall.UTF16ToString(dtzi.StandardName[:])},
+	}
+	if dtzi.DynamicDaylightTimeDisabled == 0 {
+		tests = append(tests, testType{"MUI_Dlt", syscall.UTF16ToString(dtzi.DaylightName[:])})
+	}
+
+	for _, test := range tests {
+		got, err := timezoneK.GetMUIStringValue(test.name)
+		if err != nil {
+			t.Error("GetMUIStringValue:", err)
+		}
+
+		if got != test.want {
+			t.Errorf("GetMUIStringValue: %s: Got %q, want %q", test.name, got, test.want)
+		}
+	}
+}
+
+type DynamicTimezoneinformation struct {
+	Bias                        int32
+	StandardName                [32]uint16
+	StandardDate                syscall.Systemtime
+	StandardBias                int32
+	DaylightName                [32]uint16
+	DaylightDate                syscall.Systemtime
+	DaylightBias                int32
+	TimeZoneKeyName             [128]uint16
+	DynamicDaylightTimeDisabled uint8
+}
+
+var (
+	kernel32DLL = syscall.NewLazyDLL("kernel32")
+
+	procGetDynamicTimeZoneInformation = kernel32DLL.NewProc("GetDynamicTimeZoneInformation")
+)
+
+func GetDynamicTimeZoneInformation(dtzi *DynamicTimezoneinformation) (rc uint32, err error) {
+	r0, _, e1 := syscall.Syscall(procGetDynamicTimeZoneInformation.Addr(), 1, uintptr(unsafe.Pointer(dtzi)), 0, 0)
+	rc = uint32(r0)
+	if rc == 0xffffffff {
+		if e1 != 0 {
+			err = error(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return
+}
diff --git a/src/internal/syscall/windows/registry/syscall.go b/src/internal/syscall/windows/registry/syscall.go
new file mode 100644
index 0000000..a6525da
--- /dev/null
+++ b/src/internal/syscall/windows/registry/syscall.go
@@ -0,0 +1,31 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package registry
+
+import "syscall"
+
+const (
+	_REG_OPTION_NON_VOLATILE = 0
+
+	_REG_CREATED_NEW_KEY     = 1
+	_REG_OPENED_EXISTING_KEY = 2
+
+	_ERROR_NO_MORE_ITEMS syscall.Errno = 259
+)
+
+func LoadRegLoadMUIString() error {
+	return procRegLoadMUIStringW.Find()
+}
+
+//sys	regCreateKeyEx(key syscall.Handle, subkey *uint16, reserved uint32, class *uint16, options uint32, desired uint32, sa *syscall.SecurityAttributes, result *syscall.Handle, disposition *uint32) (regerrno error) = advapi32.RegCreateKeyExW
+//sys	regDeleteKey(key syscall.Handle, subkey *uint16) (regerrno error) = advapi32.RegDeleteKeyW
+//sys	regSetValueEx(key syscall.Handle, valueName *uint16, reserved uint32, vtype uint32, buf *byte, bufsize uint32) (regerrno error) = advapi32.RegSetValueExW
+//sys	regEnumValue(key syscall.Handle, index uint32, name *uint16, nameLen *uint32, reserved *uint32, valtype *uint32, buf *byte, buflen *uint32) (regerrno error) = advapi32.RegEnumValueW
+//sys	regDeleteValue(key syscall.Handle, name *uint16) (regerrno error) = advapi32.RegDeleteValueW
+//sys   regLoadMUIString(key syscall.Handle, name *uint16, buf *uint16, buflen uint32, buflenCopied *uint32, flags uint32, dir *uint16) (regerrno error) = advapi32.RegLoadMUIStringW
+
+//sys	expandEnvironmentStrings(src *uint16, dst *uint16, size uint32) (n uint32, err error) = kernel32.ExpandEnvironmentStringsW
diff --git a/src/internal/syscall/windows/registry/value.go b/src/internal/syscall/windows/registry/value.go
new file mode 100644
index 0000000..dc3930a
--- /dev/null
+++ b/src/internal/syscall/windows/registry/value.go
@@ -0,0 +1,372 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+package registry
+
+import (
+	"errors"
+	"syscall"
+	"unicode/utf16"
+	"unsafe"
+)
+
+const (
+	// Registry value types.
+	NONE                       = 0
+	SZ                         = 1
+	EXPAND_SZ                  = 2
+	BINARY                     = 3
+	DWORD                      = 4
+	DWORD_BIG_ENDIAN           = 5
+	LINK                       = 6
+	MULTI_SZ                   = 7
+	RESOURCE_LIST              = 8
+	FULL_RESOURCE_DESCRIPTOR   = 9
+	RESOURCE_REQUIREMENTS_LIST = 10
+	QWORD                      = 11
+)
+
+var (
+	// ErrShortBuffer is returned when the buffer was too short for the operation.
+	ErrShortBuffer = syscall.ERROR_MORE_DATA
+
+	// ErrNotExist is returned when a registry key or value does not exist.
+	ErrNotExist = syscall.ERROR_FILE_NOT_FOUND
+
+	// ErrUnexpectedType is returned by Get*Value when the value's type was unexpected.
+	ErrUnexpectedType = errors.New("unexpected key value type")
+)
+
+// GetValue retrieves the type and data for the specified value associated
+// with an open key k. It fills up buffer buf and returns the retrieved
+// byte count n. If buf is too small to fit the stored value it returns
+// ErrShortBuffer error along with the required buffer size n.
+// If no buffer is provided, it returns true and actual buffer size n.
+// If no buffer is provided, GetValue returns the value's type only.
+// If the value does not exist, the error returned is ErrNotExist.
+//
+// GetValue is a low level function. If value's type is known, use the appropriate
+// Get*Value function instead.
+func (k Key) GetValue(name string, buf []byte) (n int, valtype uint32, err error) {
+	pname, err := syscall.UTF16PtrFromString(name)
+	if err != nil {
+		return 0, 0, err
+	}
+	var pbuf *byte
+	if len(buf) > 0 {
+		pbuf = (*byte)(unsafe.Pointer(&buf[0]))
+	}
+	l := uint32(len(buf))
+	err = syscall.RegQueryValueEx(syscall.Handle(k), pname, nil, &valtype, pbuf, &l)
+	if err != nil {
+		return int(l), valtype, err
+	}
+	return int(l), valtype, nil
+}
+
+func (k Key) getValue(name string, buf []byte) (date []byte, valtype uint32, err error) {
+	p, err := syscall.UTF16PtrFromString(name)
+	if err != nil {
+		return nil, 0, err
+	}
+	var t uint32
+	n := uint32(len(buf))
+	for {
+		err = syscall.RegQueryValueEx(syscall.Handle(k), p, nil, &t, (*byte)(unsafe.Pointer(&buf[0])), &n)
+		if err == nil {
+			return buf[:n], t, nil
+		}
+		if err != syscall.ERROR_MORE_DATA {
+			return nil, 0, err
+		}
+		if n <= uint32(len(buf)) {
+			return nil, 0, err
+		}
+		buf = make([]byte, n)
+	}
+}
+
+// GetStringValue retrieves the string value for the specified
+// value name associated with an open key k. It also returns the value's type.
+// If value does not exist, GetStringValue returns ErrNotExist.
+// If value is not SZ or EXPAND_SZ, it will return the correct value
+// type and ErrUnexpectedType.
+func (k Key) GetStringValue(name string) (val string, valtype uint32, err error) {
+	data, typ, err2 := k.getValue(name, make([]byte, 64))
+	if err2 != nil {
+		return "", typ, err2
+	}
+	switch typ {
+	case SZ, EXPAND_SZ:
+	default:
+		return "", typ, ErrUnexpectedType
+	}
+	if len(data) == 0 {
+		return "", typ, nil
+	}
+	u := (*[1 << 29]uint16)(unsafe.Pointer(&data[0]))[: len(data)/2 : len(data)/2]
+	return syscall.UTF16ToString(u), typ, nil
+}
+
+// GetMUIStringValue retrieves the localized string value for
+// the specified value name associated with an open key k.
+// If the value name doesn't exist or the localized string value
+// can't be resolved, GetMUIStringValue returns ErrNotExist.
+// GetMUIStringValue panics if the system doesn't support
+// regLoadMUIString; use LoadRegLoadMUIString to check if
+// regLoadMUIString is supported before calling this function.
+func (k Key) GetMUIStringValue(name string) (string, error) {
+	pname, err := syscall.UTF16PtrFromString(name)
+	if err != nil {
+		return "", err
+	}
+
+	buf := make([]uint16, 1024)
+	var buflen uint32
+	var pdir *uint16
+
+	err = regLoadMUIString(syscall.Handle(k), pname, &buf[0], uint32(len(buf)), &buflen, 0, pdir)
+	if err == syscall.ERROR_FILE_NOT_FOUND { // Try fallback path
+
+		// Try to resolve the string value using the system directory as
+		// a DLL search path; this assumes the string value is of the form
+		// @[path]\dllname,-strID but with no path given, e.g. @tzres.dll,-320.
+
+		// This approach works with tzres.dll but may have to be revised
+		// in the future to allow callers to provide custom search paths.
+
+		var s string
+		s, err = ExpandString("%SystemRoot%\\system32\\")
+		if err != nil {
+			return "", err
+		}
+		pdir, err = syscall.UTF16PtrFromString(s)
+		if err != nil {
+			return "", err
+		}
+
+		err = regLoadMUIString(syscall.Handle(k), pname, &buf[0], uint32(len(buf)), &buflen, 0, pdir)
+	}
+
+	for err == syscall.ERROR_MORE_DATA { // Grow buffer if needed
+		if buflen <= uint32(len(buf)) {
+			break // Buffer not growing, assume race; break
+		}
+		buf = make([]uint16, buflen)
+		err = regLoadMUIString(syscall.Handle(k), pname, &buf[0], uint32(len(buf)), &buflen, 0, pdir)
+	}
+
+	if err != nil {
+		return "", err
+	}
+
+	return syscall.UTF16ToString(buf), nil
+}
+
+// ExpandString expands environment-variable strings and replaces
+// them with the values defined for the current user.
+// Use ExpandString to expand EXPAND_SZ strings.
+func ExpandString(value string) (string, error) {
+	if value == "" {
+		return "", nil
+	}
+	p, err := syscall.UTF16PtrFromString(value)
+	if err != nil {
+		return "", err
+	}
+	r := make([]uint16, 100)
+	for {
+		n, err := expandEnvironmentStrings(p, &r[0], uint32(len(r)))
+		if err != nil {
+			return "", err
+		}
+		if n <= uint32(len(r)) {
+			return syscall.UTF16ToString(r[:n]), nil
+		}
+		r = make([]uint16, n)
+	}
+}
+
+// GetStringsValue retrieves the []string value for the specified
+// value name associated with an open key k. It also returns the value's type.
+// If value does not exist, GetStringsValue returns ErrNotExist.
+// If value is not MULTI_SZ, it will return the correct value
+// type and ErrUnexpectedType.
+func (k Key) GetStringsValue(name string) (val []string, valtype uint32, err error) {
+	data, typ, err2 := k.getValue(name, make([]byte, 64))
+	if err2 != nil {
+		return nil, typ, err2
+	}
+	if typ != MULTI_SZ {
+		return nil, typ, ErrUnexpectedType
+	}
+	if len(data) == 0 {
+		return nil, typ, nil
+	}
+	p := (*[1 << 29]uint16)(unsafe.Pointer(&data[0]))[: len(data)/2 : len(data)/2]
+	if len(p) == 0 {
+		return nil, typ, nil
+	}
+	if p[len(p)-1] == 0 {
+		p = p[:len(p)-1] // remove terminating null
+	}
+	val = make([]string, 0, 5)
+	from := 0
+	for i, c := range p {
+		if c == 0 {
+			val = append(val, string(utf16.Decode(p[from:i])))
+			from = i + 1
+		}
+	}
+	return val, typ, nil
+}
+
+// GetIntegerValue retrieves the integer value for the specified
+// value name associated with an open key k. It also returns the value's type.
+// If value does not exist, GetIntegerValue returns ErrNotExist.
+// If value is not DWORD or QWORD, it will return the correct value
+// type and ErrUnexpectedType.
+func (k Key) GetIntegerValue(name string) (val uint64, valtype uint32, err error) {
+	data, typ, err2 := k.getValue(name, make([]byte, 8))
+	if err2 != nil {
+		return 0, typ, err2
+	}
+	switch typ {
+	case DWORD:
+		if len(data) != 4 {
+			return 0, typ, errors.New("DWORD value is not 4 bytes long")
+		}
+		return uint64(*(*uint32)(unsafe.Pointer(&data[0]))), DWORD, nil
+	case QWORD:
+		if len(data) != 8 {
+			return 0, typ, errors.New("QWORD value is not 8 bytes long")
+		}
+		return uint64(*(*uint64)(unsafe.Pointer(&data[0]))), QWORD, nil
+	default:
+		return 0, typ, ErrUnexpectedType
+	}
+}
+
+// GetBinaryValue retrieves the binary value for the specified
+// value name associated with an open key k. It also returns the value's type.
+// If value does not exist, GetBinaryValue returns ErrNotExist.
+// If value is not BINARY, it will return the correct value
+// type and ErrUnexpectedType.
+func (k Key) GetBinaryValue(name string) (val []byte, valtype uint32, err error) {
+	data, typ, err2 := k.getValue(name, make([]byte, 64))
+	if err2 != nil {
+		return nil, typ, err2
+	}
+	if typ != BINARY {
+		return nil, typ, ErrUnexpectedType
+	}
+	return data, typ, nil
+}
+
+func (k Key) setValue(name string, valtype uint32, data []byte) error {
+	p, err := syscall.UTF16PtrFromString(name)
+	if err != nil {
+		return err
+	}
+	if len(data) == 0 {
+		return regSetValueEx(syscall.Handle(k), p, 0, valtype, nil, 0)
+	}
+	return regSetValueEx(syscall.Handle(k), p, 0, valtype, &data[0], uint32(len(data)))
+}
+
+// SetDWordValue sets the data and type of a name value
+// under key k to value and DWORD.
+func (k Key) SetDWordValue(name string, value uint32) error {
+	return k.setValue(name, DWORD, (*[4]byte)(unsafe.Pointer(&value))[:])
+}
+
+// SetQWordValue sets the data and type of a name value
+// under key k to value and QWORD.
+func (k Key) SetQWordValue(name string, value uint64) error {
+	return k.setValue(name, QWORD, (*[8]byte)(unsafe.Pointer(&value))[:])
+}
+
+func (k Key) setStringValue(name string, valtype uint32, value string) error {
+	v, err := syscall.UTF16FromString(value)
+	if err != nil {
+		return err
+	}
+	buf := (*[1 << 29]byte)(unsafe.Pointer(&v[0]))[: len(v)*2 : len(v)*2]
+	return k.setValue(name, valtype, buf)
+}
+
+// SetStringValue sets the data and type of a name value
+// under key k to value and SZ. The value must not contain a zero byte.
+func (k Key) SetStringValue(name, value string) error {
+	return k.setStringValue(name, SZ, value)
+}
+
+// SetExpandStringValue sets the data and type of a name value
+// under key k to value and EXPAND_SZ. The value must not contain a zero byte.
+func (k Key) SetExpandStringValue(name, value string) error {
+	return k.setStringValue(name, EXPAND_SZ, value)
+}
+
+// SetStringsValue sets the data and type of a name value
+// under key k to value and MULTI_SZ. The value strings
+// must not contain a zero byte.
+func (k Key) SetStringsValue(name string, value []string) error {
+	ss := ""
+	for _, s := range value {
+		for i := 0; i < len(s); i++ {
+			if s[i] == 0 {
+				return errors.New("string cannot have 0 inside")
+			}
+		}
+		ss += s + "\x00"
+	}
+	v := utf16.Encode([]rune(ss + "\x00"))
+	buf := (*[1 << 29]byte)(unsafe.Pointer(&v[0]))[: len(v)*2 : len(v)*2]
+	return k.setValue(name, MULTI_SZ, buf)
+}
+
+// SetBinaryValue sets the data and type of a name value
+// under key k to value and BINARY.
+func (k Key) SetBinaryValue(name string, value []byte) error {
+	return k.setValue(name, BINARY, value)
+}
+
+// DeleteValue removes a named value from the key k.
+func (k Key) DeleteValue(name string) error {
+	return regDeleteValue(syscall.Handle(k), syscall.StringToUTF16Ptr(name))
+}
+
+// ReadValueNames returns the value names of key k.
+func (k Key) ReadValueNames() ([]string, error) {
+	ki, err := k.Stat()
+	if err != nil {
+		return nil, err
+	}
+	names := make([]string, 0, ki.ValueCount)
+	buf := make([]uint16, ki.MaxValueNameLen+1) // extra room for terminating null character
+loopItems:
+	for i := uint32(0); ; i++ {
+		l := uint32(len(buf))
+		for {
+			err := regEnumValue(syscall.Handle(k), i, &buf[0], &l, nil, nil, nil, nil)
+			if err == nil {
+				break
+			}
+			if err == syscall.ERROR_MORE_DATA {
+				// Double buffer size and try again.
+				l = uint32(2 * len(buf))
+				buf = make([]uint16, l)
+				continue
+			}
+			if err == _ERROR_NO_MORE_ITEMS {
+				break loopItems
+			}
+			return names, err
+		}
+		names = append(names, syscall.UTF16ToString(buf[:l]))
+	}
+	return names, nil
+}
diff --git a/src/internal/syscall/windows/registry/zsyscall_windows.go b/src/internal/syscall/windows/registry/zsyscall_windows.go
new file mode 100644
index 0000000..cab1319
--- /dev/null
+++ b/src/internal/syscall/windows/registry/zsyscall_windows.go
@@ -0,0 +1,107 @@
+// Code generated by 'go generate'; DO NOT EDIT.
+
+package registry
+
+import (
+	"internal/syscall/windows/sysdll"
+	"syscall"
+	"unsafe"
+)
+
+var _ unsafe.Pointer
+
+// Do the interface allocations only once for common
+// Errno values.
+const (
+	errnoERROR_IO_PENDING = 997
+)
+
+var (
+	errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+	errERROR_EINVAL     error = syscall.EINVAL
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return errERROR_EINVAL
+	case errnoERROR_IO_PENDING:
+		return errERROR_IO_PENDING
+	}
+	// TODO: add more here, after collecting data on the common
+	// error values see on Windows. (perhaps when running
+	// all.bat?)
+	return e
+}
+
+var (
+	modadvapi32 = syscall.NewLazyDLL(sysdll.Add("advapi32.dll"))
+	modkernel32 = syscall.NewLazyDLL(sysdll.Add("kernel32.dll"))
+
+	procRegCreateKeyExW           = modadvapi32.NewProc("RegCreateKeyExW")
+	procRegDeleteKeyW             = modadvapi32.NewProc("RegDeleteKeyW")
+	procRegDeleteValueW           = modadvapi32.NewProc("RegDeleteValueW")
+	procRegEnumValueW             = modadvapi32.NewProc("RegEnumValueW")
+	procRegLoadMUIStringW         = modadvapi32.NewProc("RegLoadMUIStringW")
+	procRegSetValueExW            = modadvapi32.NewProc("RegSetValueExW")
+	procExpandEnvironmentStringsW = modkernel32.NewProc("ExpandEnvironmentStringsW")
+)
+
+func regCreateKeyEx(key syscall.Handle, subkey *uint16, reserved uint32, class *uint16, options uint32, desired uint32, sa *syscall.SecurityAttributes, result *syscall.Handle, disposition *uint32) (regerrno error) {
+	r0, _, _ := syscall.Syscall9(procRegCreateKeyExW.Addr(), 9, uintptr(key), uintptr(unsafe.Pointer(subkey)), uintptr(reserved), uintptr(unsafe.Pointer(class)), uintptr(options), uintptr(desired), uintptr(unsafe.Pointer(sa)), uintptr(unsafe.Pointer(result)), uintptr(unsafe.Pointer(disposition)))
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func regDeleteKey(key syscall.Handle, subkey *uint16) (regerrno error) {
+	r0, _, _ := syscall.Syscall(procRegDeleteKeyW.Addr(), 2, uintptr(key), uintptr(unsafe.Pointer(subkey)), 0)
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func regDeleteValue(key syscall.Handle, name *uint16) (regerrno error) {
+	r0, _, _ := syscall.Syscall(procRegDeleteValueW.Addr(), 2, uintptr(key), uintptr(unsafe.Pointer(name)), 0)
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func regEnumValue(key syscall.Handle, index uint32, name *uint16, nameLen *uint32, reserved *uint32, valtype *uint32, buf *byte, buflen *uint32) (regerrno error) {
+	r0, _, _ := syscall.Syscall9(procRegEnumValueW.Addr(), 8, uintptr(key), uintptr(index), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameLen)), uintptr(unsafe.Pointer(reserved)), uintptr(unsafe.Pointer(valtype)), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(buflen)), 0)
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func regLoadMUIString(key syscall.Handle, name *uint16, buf *uint16, buflen uint32, buflenCopied *uint32, flags uint32, dir *uint16) (regerrno error) {
+	r0, _, _ := syscall.Syscall9(procRegLoadMUIStringW.Addr(), 7, uintptr(key), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buf)), uintptr(buflen), uintptr(unsafe.Pointer(buflenCopied)), uintptr(flags), uintptr(unsafe.Pointer(dir)), 0, 0)
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func regSetValueEx(key syscall.Handle, valueName *uint16, reserved uint32, vtype uint32, buf *byte, bufsize uint32) (regerrno error) {
+	r0, _, _ := syscall.Syscall6(procRegSetValueExW.Addr(), 6, uintptr(key), uintptr(unsafe.Pointer(valueName)), uintptr(reserved), uintptr(vtype), uintptr(unsafe.Pointer(buf)), uintptr(bufsize))
+	if r0 != 0 {
+		regerrno = syscall.Errno(r0)
+	}
+	return
+}
+
+func expandEnvironmentStrings(src *uint16, dst *uint16, size uint32) (n uint32, err error) {
+	r0, _, e1 := syscall.Syscall(procExpandEnvironmentStringsW.Addr(), 3, uintptr(unsafe.Pointer(src)), uintptr(unsafe.Pointer(dst)), uintptr(size))
+	n = uint32(r0)
+	if n == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
diff --git a/src/internal/syscall/windows/reparse_windows.go b/src/internal/syscall/windows/reparse_windows.go
new file mode 100644
index 0000000..6e11139
--- /dev/null
+++ b/src/internal/syscall/windows/reparse_windows.go
@@ -0,0 +1,90 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	FSCTL_SET_REPARSE_POINT    = 0x000900A4
+	IO_REPARSE_TAG_MOUNT_POINT = 0xA0000003
+
+	SYMLINK_FLAG_RELATIVE = 1
+)
+
+// These structures are described
+// in https://msdn.microsoft.com/en-us/library/cc232007.aspx
+// and https://msdn.microsoft.com/en-us/library/cc232006.aspx.
+
+type REPARSE_DATA_BUFFER struct {
+	ReparseTag        uint32
+	ReparseDataLength uint16
+	Reserved          uint16
+	DUMMYUNIONNAME    byte
+}
+
+// REPARSE_DATA_BUFFER_HEADER is a common part of REPARSE_DATA_BUFFER structure.
+type REPARSE_DATA_BUFFER_HEADER struct {
+	ReparseTag uint32
+	// The size, in bytes, of the reparse data that follows
+	// the common portion of the REPARSE_DATA_BUFFER element.
+	// This value is the length of the data starting at the
+	// SubstituteNameOffset field.
+	ReparseDataLength uint16
+	Reserved          uint16
+}
+
+type SymbolicLinkReparseBuffer struct {
+	// The integer that contains the offset, in bytes,
+	// of the substitute name string in the PathBuffer array,
+	// computed as an offset from byte 0 of PathBuffer. Note that
+	// this offset must be divided by 2 to get the array index.
+	SubstituteNameOffset uint16
+	// The integer that contains the length, in bytes, of the
+	// substitute name string. If this string is null-terminated,
+	// SubstituteNameLength does not include the Unicode null character.
+	SubstituteNameLength uint16
+	// PrintNameOffset is similar to SubstituteNameOffset.
+	PrintNameOffset uint16
+	// PrintNameLength is similar to SubstituteNameLength.
+	PrintNameLength uint16
+	// Flags specifies whether the substitute name is a full path name or
+	// a path name relative to the directory containing the symbolic link.
+	Flags      uint32
+	PathBuffer [1]uint16
+}
+
+// Path returns path stored in rb.
+func (rb *SymbolicLinkReparseBuffer) Path() string {
+	n1 := rb.SubstituteNameOffset / 2
+	n2 := (rb.SubstituteNameOffset + rb.SubstituteNameLength) / 2
+	return syscall.UTF16ToString((*[0xffff]uint16)(unsafe.Pointer(&rb.PathBuffer[0]))[n1:n2:n2])
+}
+
+type MountPointReparseBuffer struct {
+	// The integer that contains the offset, in bytes,
+	// of the substitute name string in the PathBuffer array,
+	// computed as an offset from byte 0 of PathBuffer. Note that
+	// this offset must be divided by 2 to get the array index.
+	SubstituteNameOffset uint16
+	// The integer that contains the length, in bytes, of the
+	// substitute name string. If this string is null-terminated,
+	// SubstituteNameLength does not include the Unicode null character.
+	SubstituteNameLength uint16
+	// PrintNameOffset is similar to SubstituteNameOffset.
+	PrintNameOffset uint16
+	// PrintNameLength is similar to SubstituteNameLength.
+	PrintNameLength uint16
+	PathBuffer      [1]uint16
+}
+
+// Path returns path stored in rb.
+func (rb *MountPointReparseBuffer) Path() string {
+	n1 := rb.SubstituteNameOffset / 2
+	n2 := (rb.SubstituteNameOffset + rb.SubstituteNameLength) / 2
+	return syscall.UTF16ToString((*[0xffff]uint16)(unsafe.Pointer(&rb.PathBuffer[0]))[n1:n2:n2])
+}
diff --git a/src/internal/syscall/windows/security_windows.go b/src/internal/syscall/windows/security_windows.go
new file mode 100644
index 0000000..4a2dfc0
--- /dev/null
+++ b/src/internal/syscall/windows/security_windows.go
@@ -0,0 +1,128 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+const (
+	SecurityAnonymous      = 0
+	SecurityIdentification = 1
+	SecurityImpersonation  = 2
+	SecurityDelegation     = 3
+)
+
+//sys	ImpersonateSelf(impersonationlevel uint32) (err error) = advapi32.ImpersonateSelf
+//sys	RevertToSelf() (err error) = advapi32.RevertToSelf
+
+const (
+	TOKEN_ADJUST_PRIVILEGES = 0x0020
+	SE_PRIVILEGE_ENABLED    = 0x00000002
+)
+
+type LUID struct {
+	LowPart  uint32
+	HighPart int32
+}
+
+type LUID_AND_ATTRIBUTES struct {
+	Luid       LUID
+	Attributes uint32
+}
+
+type TOKEN_PRIVILEGES struct {
+	PrivilegeCount uint32
+	Privileges     [1]LUID_AND_ATTRIBUTES
+}
+
+//sys	OpenThreadToken(h syscall.Handle, access uint32, openasself bool, token *syscall.Token) (err error) = advapi32.OpenThreadToken
+//sys	LookupPrivilegeValue(systemname *uint16, name *uint16, luid *LUID) (err error) = advapi32.LookupPrivilegeValueW
+//sys	adjustTokenPrivileges(token syscall.Token, disableAllPrivileges bool, newstate *TOKEN_PRIVILEGES, buflen uint32, prevstate *TOKEN_PRIVILEGES, returnlen *uint32) (ret uint32, err error) [true] = advapi32.AdjustTokenPrivileges
+
+func AdjustTokenPrivileges(token syscall.Token, disableAllPrivileges bool, newstate *TOKEN_PRIVILEGES, buflen uint32, prevstate *TOKEN_PRIVILEGES, returnlen *uint32) error {
+	ret, err := adjustTokenPrivileges(token, disableAllPrivileges, newstate, buflen, prevstate, returnlen)
+	if ret == 0 {
+		// AdjustTokenPrivileges call failed
+		return err
+	}
+	// AdjustTokenPrivileges call succeeded
+	if err == syscall.EINVAL {
+		// GetLastError returned ERROR_SUCCESS
+		return nil
+	}
+	return err
+}
+
+//sys DuplicateTokenEx(hExistingToken syscall.Token, dwDesiredAccess uint32, lpTokenAttributes *syscall.SecurityAttributes, impersonationLevel uint32, tokenType TokenType, phNewToken *syscall.Token) (err error) = advapi32.DuplicateTokenEx
+//sys SetTokenInformation(tokenHandle syscall.Token, tokenInformationClass uint32, tokenInformation uintptr, tokenInformationLength uint32) (err error) = advapi32.SetTokenInformation
+
+type SID_AND_ATTRIBUTES struct {
+	Sid        *syscall.SID
+	Attributes uint32
+}
+
+type TOKEN_MANDATORY_LABEL struct {
+	Label SID_AND_ATTRIBUTES
+}
+
+func (tml *TOKEN_MANDATORY_LABEL) Size() uint32 {
+	return uint32(unsafe.Sizeof(TOKEN_MANDATORY_LABEL{})) + syscall.GetLengthSid(tml.Label.Sid)
+}
+
+const SE_GROUP_INTEGRITY = 0x00000020
+
+type TokenType uint32
+
+const (
+	TokenPrimary       TokenType = 1
+	TokenImpersonation TokenType = 2
+)
+
+//sys	GetProfilesDirectory(dir *uint16, dirLen *uint32) (err error) = userenv.GetProfilesDirectoryW
+
+const (
+	LG_INCLUDE_INDIRECT  = 0x1
+	MAX_PREFERRED_LENGTH = 0xFFFFFFFF
+)
+
+type LocalGroupUserInfo0 struct {
+	Name *uint16
+}
+
+type UserInfo4 struct {
+	Name            *uint16
+	Password        *uint16
+	PasswordAge     uint32
+	Priv            uint32
+	HomeDir         *uint16
+	Comment         *uint16
+	Flags           uint32
+	ScriptPath      *uint16
+	AuthFlags       uint32
+	FullName        *uint16
+	UsrComment      *uint16
+	Parms           *uint16
+	Workstations    *uint16
+	LastLogon       uint32
+	LastLogoff      uint32
+	AcctExpires     uint32
+	MaxStorage      uint32
+	UnitsPerWeek    uint32
+	LogonHours      *byte
+	BadPwCount      uint32
+	NumLogons       uint32
+	LogonServer     *uint16
+	CountryCode     uint32
+	CodePage        uint32
+	UserSid         *syscall.SID
+	PrimaryGroupID  uint32
+	Profile         *uint16
+	HomeDirDrive    *uint16
+	PasswordExpired uint32
+}
+
+//sys	NetUserGetLocalGroups(serverName *uint16, userName *uint16, level uint32, flags uint32, buf **byte, prefMaxLen uint32, entriesRead *uint32, totalEntries *uint32) (neterr error) = netapi32.NetUserGetLocalGroups
diff --git a/src/internal/syscall/windows/symlink_windows.go b/src/internal/syscall/windows/symlink_windows.go
new file mode 100644
index 0000000..b64d058
--- /dev/null
+++ b/src/internal/syscall/windows/symlink_windows.go
@@ -0,0 +1,39 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+import "syscall"
+
+const (
+	ERROR_INVALID_PARAMETER syscall.Errno = 87
+
+	// symlink support for CreateSymbolicLink() starting with Windows 10 (1703, v10.0.14972)
+	SYMBOLIC_LINK_FLAG_ALLOW_UNPRIVILEGED_CREATE = 0x2
+
+	// FileInformationClass values
+	FileBasicInfo                  = 0    // FILE_BASIC_INFO
+	FileStandardInfo               = 1    // FILE_STANDARD_INFO
+	FileNameInfo                   = 2    // FILE_NAME_INFO
+	FileStreamInfo                 = 7    // FILE_STREAM_INFO
+	FileCompressionInfo            = 8    // FILE_COMPRESSION_INFO
+	FileAttributeTagInfo           = 9    // FILE_ATTRIBUTE_TAG_INFO
+	FileIdBothDirectoryInfo        = 0xa  // FILE_ID_BOTH_DIR_INFO
+	FileIdBothDirectoryRestartInfo = 0xb  // FILE_ID_BOTH_DIR_INFO
+	FileRemoteProtocolInfo         = 0xd  // FILE_REMOTE_PROTOCOL_INFO
+	FileFullDirectoryInfo          = 0xe  // FILE_FULL_DIR_INFO
+	FileFullDirectoryRestartInfo   = 0xf  // FILE_FULL_DIR_INFO
+	FileStorageInfo                = 0x10 // FILE_STORAGE_INFO
+	FileAlignmentInfo              = 0x11 // FILE_ALIGNMENT_INFO
+	FileIdInfo                     = 0x12 // FILE_ID_INFO
+	FileIdExtdDirectoryInfo        = 0x13 // FILE_ID_EXTD_DIR_INFO
+	FileIdExtdDirectoryRestartInfo = 0x14 // FILE_ID_EXTD_DIR_INFO
+)
+
+type FILE_ATTRIBUTE_TAG_INFO struct {
+	FileAttributes uint32
+	ReparseTag     uint32
+}
+
+//sys	GetFileInformationByHandleEx(handle syscall.Handle, class uint32, info *byte, bufsize uint32) (err error)
diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go
new file mode 100644
index 0000000..f8965d0
--- /dev/null
+++ b/src/internal/syscall/windows/syscall_windows.go
@@ -0,0 +1,346 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package windows
+
+import (
+	"internal/unsafeheader"
+	"sync"
+	"syscall"
+	"unicode/utf16"
+	"unsafe"
+)
+
+// UTF16PtrToString is like UTF16ToString, but takes *uint16
+// as a parameter instead of []uint16.
+func UTF16PtrToString(p *uint16) string {
+	if p == nil {
+		return ""
+	}
+	// Find NUL terminator.
+	end := unsafe.Pointer(p)
+	n := 0
+	for *(*uint16)(end) != 0 {
+		end = unsafe.Pointer(uintptr(end) + unsafe.Sizeof(*p))
+		n++
+	}
+	// Turn *uint16 into []uint16.
+	var s []uint16
+	hdr := (*unsafeheader.Slice)(unsafe.Pointer(&s))
+	hdr.Data = unsafe.Pointer(p)
+	hdr.Cap = n
+	hdr.Len = n
+	// Decode []uint16 into string.
+	return string(utf16.Decode(s))
+}
+
+const (
+	ERROR_SHARING_VIOLATION      syscall.Errno = 32
+	ERROR_LOCK_VIOLATION         syscall.Errno = 33
+	ERROR_NOT_SUPPORTED          syscall.Errno = 50
+	ERROR_CALL_NOT_IMPLEMENTED   syscall.Errno = 120
+	ERROR_INVALID_NAME           syscall.Errno = 123
+	ERROR_LOCK_FAILED            syscall.Errno = 167
+	ERROR_NO_UNICODE_TRANSLATION syscall.Errno = 1113
+)
+
+const GAA_FLAG_INCLUDE_PREFIX = 0x00000010
+
+const (
+	IF_TYPE_OTHER              = 1
+	IF_TYPE_ETHERNET_CSMACD    = 6
+	IF_TYPE_ISO88025_TOKENRING = 9
+	IF_TYPE_PPP                = 23
+	IF_TYPE_SOFTWARE_LOOPBACK  = 24
+	IF_TYPE_ATM                = 37
+	IF_TYPE_IEEE80211          = 71
+	IF_TYPE_TUNNEL             = 131
+	IF_TYPE_IEEE1394           = 144
+)
+
+type SocketAddress struct {
+	Sockaddr       *syscall.RawSockaddrAny
+	SockaddrLength int32
+}
+
+type IpAdapterUnicastAddress struct {
+	Length             uint32
+	Flags              uint32
+	Next               *IpAdapterUnicastAddress
+	Address            SocketAddress
+	PrefixOrigin       int32
+	SuffixOrigin       int32
+	DadState           int32
+	ValidLifetime      uint32
+	PreferredLifetime  uint32
+	LeaseLifetime      uint32
+	OnLinkPrefixLength uint8
+}
+
+type IpAdapterAnycastAddress struct {
+	Length  uint32
+	Flags   uint32
+	Next    *IpAdapterAnycastAddress
+	Address SocketAddress
+}
+
+type IpAdapterMulticastAddress struct {
+	Length  uint32
+	Flags   uint32
+	Next    *IpAdapterMulticastAddress
+	Address SocketAddress
+}
+
+type IpAdapterDnsServerAdapter struct {
+	Length   uint32
+	Reserved uint32
+	Next     *IpAdapterDnsServerAdapter
+	Address  SocketAddress
+}
+
+type IpAdapterPrefix struct {
+	Length       uint32
+	Flags        uint32
+	Next         *IpAdapterPrefix
+	Address      SocketAddress
+	PrefixLength uint32
+}
+
+type IpAdapterAddresses struct {
+	Length                uint32
+	IfIndex               uint32
+	Next                  *IpAdapterAddresses
+	AdapterName           *byte
+	FirstUnicastAddress   *IpAdapterUnicastAddress
+	FirstAnycastAddress   *IpAdapterAnycastAddress
+	FirstMulticastAddress *IpAdapterMulticastAddress
+	FirstDnsServerAddress *IpAdapterDnsServerAdapter
+	DnsSuffix             *uint16
+	Description           *uint16
+	FriendlyName          *uint16
+	PhysicalAddress       [syscall.MAX_ADAPTER_ADDRESS_LENGTH]byte
+	PhysicalAddressLength uint32
+	Flags                 uint32
+	Mtu                   uint32
+	IfType                uint32
+	OperStatus            uint32
+	Ipv6IfIndex           uint32
+	ZoneIndices           [16]uint32
+	FirstPrefix           *IpAdapterPrefix
+	/* more fields might be present here. */
+}
+
+type FILE_BASIC_INFO struct {
+	CreationTime   syscall.Filetime
+	LastAccessTime syscall.Filetime
+	LastWriteTime  syscall.Filetime
+	ChangedTime    syscall.Filetime
+	FileAttributes uint32
+}
+
+const (
+	IfOperStatusUp             = 1
+	IfOperStatusDown           = 2
+	IfOperStatusTesting        = 3
+	IfOperStatusUnknown        = 4
+	IfOperStatusDormant        = 5
+	IfOperStatusNotPresent     = 6
+	IfOperStatusLowerLayerDown = 7
+)
+
+//sys	GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapterAddresses *IpAdapterAddresses, sizePointer *uint32) (errcode error) = iphlpapi.GetAdaptersAddresses
+//sys	GetComputerNameEx(nameformat uint32, buf *uint16, n *uint32) (err error) = GetComputerNameExW
+//sys	MoveFileEx(from *uint16, to *uint16, flags uint32) (err error) = MoveFileExW
+//sys	GetModuleFileName(module syscall.Handle, fn *uint16, len uint32) (n uint32, err error) = kernel32.GetModuleFileNameW
+//sys	SetFileInformationByHandle(handle syscall.Handle, fileInformationClass uint32, buf uintptr, bufsize uint32) (err error) = kernel32.SetFileInformationByHandle
+
+const (
+	WSA_FLAG_OVERLAPPED        = 0x01
+	WSA_FLAG_NO_HANDLE_INHERIT = 0x80
+
+	WSAEMSGSIZE syscall.Errno = 10040
+
+	MSG_PEEK   = 0x2
+	MSG_TRUNC  = 0x0100
+	MSG_CTRUNC = 0x0200
+
+	socket_error = uintptr(^uint32(0))
+)
+
+var WSAID_WSASENDMSG = syscall.GUID{
+	Data1: 0xa441e712,
+	Data2: 0x754f,
+	Data3: 0x43ca,
+	Data4: [8]byte{0x84, 0xa7, 0x0d, 0xee, 0x44, 0xcf, 0x60, 0x6d},
+}
+
+var WSAID_WSARECVMSG = syscall.GUID{
+	Data1: 0xf689d7c8,
+	Data2: 0x6f1f,
+	Data3: 0x436b,
+	Data4: [8]byte{0x8a, 0x53, 0xe5, 0x4f, 0xe3, 0x51, 0xc3, 0x22},
+}
+
+var sendRecvMsgFunc struct {
+	once     sync.Once
+	sendAddr uintptr
+	recvAddr uintptr
+	err      error
+}
+
+type WSAMsg struct {
+	Name        syscall.Pointer
+	Namelen     int32
+	Buffers     *syscall.WSABuf
+	BufferCount uint32
+	Control     syscall.WSABuf
+	Flags       uint32
+}
+
+//sys	WSASocket(af int32, typ int32, protocol int32, protinfo *syscall.WSAProtocolInfo, group uint32, flags uint32) (handle syscall.Handle, err error) [failretval==syscall.InvalidHandle] = ws2_32.WSASocketW
+
+func loadWSASendRecvMsg() error {
+	sendRecvMsgFunc.once.Do(func() {
+		var s syscall.Handle
+		s, sendRecvMsgFunc.err = syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP)
+		if sendRecvMsgFunc.err != nil {
+			return
+		}
+		defer syscall.CloseHandle(s)
+		var n uint32
+		sendRecvMsgFunc.err = syscall.WSAIoctl(s,
+			syscall.SIO_GET_EXTENSION_FUNCTION_POINTER,
+			(*byte)(unsafe.Pointer(&WSAID_WSARECVMSG)),
+			uint32(unsafe.Sizeof(WSAID_WSARECVMSG)),
+			(*byte)(unsafe.Pointer(&sendRecvMsgFunc.recvAddr)),
+			uint32(unsafe.Sizeof(sendRecvMsgFunc.recvAddr)),
+			&n, nil, 0)
+		if sendRecvMsgFunc.err != nil {
+			return
+		}
+		sendRecvMsgFunc.err = syscall.WSAIoctl(s,
+			syscall.SIO_GET_EXTENSION_FUNCTION_POINTER,
+			(*byte)(unsafe.Pointer(&WSAID_WSASENDMSG)),
+			uint32(unsafe.Sizeof(WSAID_WSASENDMSG)),
+			(*byte)(unsafe.Pointer(&sendRecvMsgFunc.sendAddr)),
+			uint32(unsafe.Sizeof(sendRecvMsgFunc.sendAddr)),
+			&n, nil, 0)
+	})
+	return sendRecvMsgFunc.err
+}
+
+func WSASendMsg(fd syscall.Handle, msg *WSAMsg, flags uint32, bytesSent *uint32, overlapped *syscall.Overlapped, croutine *byte) error {
+	err := loadWSASendRecvMsg()
+	if err != nil {
+		return err
+	}
+	r1, _, e1 := syscall.Syscall6(sendRecvMsgFunc.sendAddr, 6, uintptr(fd), uintptr(unsafe.Pointer(msg)), uintptr(flags), uintptr(unsafe.Pointer(bytesSent)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine)))
+	if r1 == socket_error {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return err
+}
+
+func WSARecvMsg(fd syscall.Handle, msg *WSAMsg, bytesReceived *uint32, overlapped *syscall.Overlapped, croutine *byte) error {
+	err := loadWSASendRecvMsg()
+	if err != nil {
+		return err
+	}
+	r1, _, e1 := syscall.Syscall6(sendRecvMsgFunc.recvAddr, 5, uintptr(fd), uintptr(unsafe.Pointer(msg)), uintptr(unsafe.Pointer(bytesReceived)), uintptr(unsafe.Pointer(overlapped)), uintptr(unsafe.Pointer(croutine)), 0)
+	if r1 == socket_error {
+		if e1 != 0 {
+			err = errnoErr(e1)
+		} else {
+			err = syscall.EINVAL
+		}
+	}
+	return err
+}
+
+const (
+	ComputerNameNetBIOS                   = 0
+	ComputerNameDnsHostname               = 1
+	ComputerNameDnsDomain                 = 2
+	ComputerNameDnsFullyQualified         = 3
+	ComputerNamePhysicalNetBIOS           = 4
+	ComputerNamePhysicalDnsHostname       = 5
+	ComputerNamePhysicalDnsDomain         = 6
+	ComputerNamePhysicalDnsFullyQualified = 7
+	ComputerNameMax                       = 8
+
+	MOVEFILE_REPLACE_EXISTING      = 0x1
+	MOVEFILE_COPY_ALLOWED          = 0x2
+	MOVEFILE_DELAY_UNTIL_REBOOT    = 0x4
+	MOVEFILE_WRITE_THROUGH         = 0x8
+	MOVEFILE_CREATE_HARDLINK       = 0x10
+	MOVEFILE_FAIL_IF_NOT_TRACKABLE = 0x20
+)
+
+func Rename(oldpath, newpath string) error {
+	from, err := syscall.UTF16PtrFromString(oldpath)
+	if err != nil {
+		return err
+	}
+	to, err := syscall.UTF16PtrFromString(newpath)
+	if err != nil {
+		return err
+	}
+	return MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING)
+}
+
+//sys LockFileEx(file syscall.Handle, flags uint32, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) = kernel32.LockFileEx
+//sys UnlockFileEx(file syscall.Handle, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) = kernel32.UnlockFileEx
+
+const (
+	LOCKFILE_FAIL_IMMEDIATELY = 0x00000001
+	LOCKFILE_EXCLUSIVE_LOCK   = 0x00000002
+)
+
+const MB_ERR_INVALID_CHARS = 8
+
+//sys	GetACP() (acp uint32) = kernel32.GetACP
+//sys	GetConsoleCP() (ccp uint32) = kernel32.GetConsoleCP
+//sys	MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) = kernel32.MultiByteToWideChar
+//sys	GetCurrentThread() (pseudoHandle syscall.Handle, err error) = kernel32.GetCurrentThread
+
+const STYPE_DISKTREE = 0x00
+
+type SHARE_INFO_2 struct {
+	Netname     *uint16
+	Type        uint32
+	Remark      *uint16
+	Permissions uint32
+	MaxUses     uint32
+	CurrentUses uint32
+	Path        *uint16
+	Passwd      *uint16
+}
+
+//sys  NetShareAdd(serverName *uint16, level uint32, buf *byte, parmErr *uint16) (neterr error) = netapi32.NetShareAdd
+//sys  NetShareDel(serverName *uint16, netName *uint16, reserved uint32) (neterr error) = netapi32.NetShareDel
+
+const (
+	FILE_NAME_NORMALIZED = 0x0
+	FILE_NAME_OPENED     = 0x8
+
+	VOLUME_NAME_DOS  = 0x0
+	VOLUME_NAME_GUID = 0x1
+	VOLUME_NAME_NONE = 0x4
+	VOLUME_NAME_NT   = 0x2
+)
+
+//sys	GetFinalPathNameByHandle(file syscall.Handle, filePath *uint16, filePathSize uint32, flags uint32) (n uint32, err error) = kernel32.GetFinalPathNameByHandleW
+
+func LoadGetFinalPathNameByHandle() error {
+	return procGetFinalPathNameByHandleW.Find()
+}
+
+//sys	CreateEnvironmentBlock(block **uint16, token syscall.Token, inheritExisting bool) (err error) = userenv.CreateEnvironmentBlock
+//sys	DestroyEnvironmentBlock(block *uint16) (err error) = userenv.DestroyEnvironmentBlock
+
+//sys	RtlGenRandom(buf []byte) (err error) = advapi32.SystemFunction036
diff --git a/src/internal/syscall/windows/sysdll/sysdll.go b/src/internal/syscall/windows/sysdll/sysdll.go
new file mode 100644
index 0000000..c587c19
--- /dev/null
+++ b/src/internal/syscall/windows/sysdll/sysdll.go
@@ -0,0 +1,30 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build windows
+
+// Package sysdll is an internal leaf package that records and reports
+// which Windows DLL names are used by Go itself. These DLLs are then
+// only loaded from the System32 directory. See Issue 14959.
+package sysdll
+
+// IsSystemDLL reports whether the named dll key (a base name, like
+// "foo.dll") is a system DLL which should only be loaded from the
+// Windows SYSTEM32 directory.
+//
+// Filenames are case sensitive, but that doesn't matter because
+// the case registered with Add is also the same case used with
+// LoadDLL later.
+//
+// It has no associated mutex and should only be mutated serially
+// (currently: during init), and not concurrent with DLL loading.
+var IsSystemDLL = map[string]bool{}
+
+// Add notes that dll is a system32 DLL which should only be loaded
+// from the Windows SYSTEM32 directory. It returns its argument back,
+// for ease of use in generated code.
+func Add(dll string) string {
+	IsSystemDLL[dll] = true
+	return dll
+}
diff --git a/src/internal/syscall/windows/zsyscall_windows.go b/src/internal/syscall/windows/zsyscall_windows.go
new file mode 100644
index 0000000..aaad4a5
--- /dev/null
+++ b/src/internal/syscall/windows/zsyscall_windows.go
@@ -0,0 +1,327 @@
+// Code generated by 'go generate'; DO NOT EDIT.
+
+package windows
+
+import (
+	"internal/syscall/windows/sysdll"
+	"syscall"
+	"unsafe"
+)
+
+var _ unsafe.Pointer
+
+// Do the interface allocations only once for common
+// Errno values.
+const (
+	errnoERROR_IO_PENDING = 997
+)
+
+var (
+	errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
+	errERROR_EINVAL     error = syscall.EINVAL
+)
+
+// errnoErr returns common boxed Errno values, to prevent
+// allocations at runtime.
+func errnoErr(e syscall.Errno) error {
+	switch e {
+	case 0:
+		return errERROR_EINVAL
+	case errnoERROR_IO_PENDING:
+		return errERROR_IO_PENDING
+	}
+	// TODO: add more here, after collecting data on the common
+	// error values see on Windows. (perhaps when running
+	// all.bat?)
+	return e
+}
+
+var (
+	modadvapi32 = syscall.NewLazyDLL(sysdll.Add("advapi32.dll"))
+	modiphlpapi = syscall.NewLazyDLL(sysdll.Add("iphlpapi.dll"))
+	modkernel32 = syscall.NewLazyDLL(sysdll.Add("kernel32.dll"))
+	modnetapi32 = syscall.NewLazyDLL(sysdll.Add("netapi32.dll"))
+	modpsapi    = syscall.NewLazyDLL(sysdll.Add("psapi.dll"))
+	moduserenv  = syscall.NewLazyDLL(sysdll.Add("userenv.dll"))
+	modws2_32   = syscall.NewLazyDLL(sysdll.Add("ws2_32.dll"))
+
+	procAdjustTokenPrivileges        = modadvapi32.NewProc("AdjustTokenPrivileges")
+	procDuplicateTokenEx             = modadvapi32.NewProc("DuplicateTokenEx")
+	procImpersonateSelf              = modadvapi32.NewProc("ImpersonateSelf")
+	procLookupPrivilegeValueW        = modadvapi32.NewProc("LookupPrivilegeValueW")
+	procOpenThreadToken              = modadvapi32.NewProc("OpenThreadToken")
+	procRevertToSelf                 = modadvapi32.NewProc("RevertToSelf")
+	procSetTokenInformation          = modadvapi32.NewProc("SetTokenInformation")
+	procSystemFunction036            = modadvapi32.NewProc("SystemFunction036")
+	procGetAdaptersAddresses         = modiphlpapi.NewProc("GetAdaptersAddresses")
+	procGetACP                       = modkernel32.NewProc("GetACP")
+	procGetComputerNameExW           = modkernel32.NewProc("GetComputerNameExW")
+	procGetConsoleCP                 = modkernel32.NewProc("GetConsoleCP")
+	procGetCurrentThread             = modkernel32.NewProc("GetCurrentThread")
+	procGetFileInformationByHandleEx = modkernel32.NewProc("GetFileInformationByHandleEx")
+	procGetFinalPathNameByHandleW    = modkernel32.NewProc("GetFinalPathNameByHandleW")
+	procGetModuleFileNameW           = modkernel32.NewProc("GetModuleFileNameW")
+	procLockFileEx                   = modkernel32.NewProc("LockFileEx")
+	procMoveFileExW                  = modkernel32.NewProc("MoveFileExW")
+	procMultiByteToWideChar          = modkernel32.NewProc("MultiByteToWideChar")
+	procSetFileInformationByHandle   = modkernel32.NewProc("SetFileInformationByHandle")
+	procUnlockFileEx                 = modkernel32.NewProc("UnlockFileEx")
+	procNetShareAdd                  = modnetapi32.NewProc("NetShareAdd")
+	procNetShareDel                  = modnetapi32.NewProc("NetShareDel")
+	procNetUserGetLocalGroups        = modnetapi32.NewProc("NetUserGetLocalGroups")
+	procGetProcessMemoryInfo         = modpsapi.NewProc("GetProcessMemoryInfo")
+	procCreateEnvironmentBlock       = moduserenv.NewProc("CreateEnvironmentBlock")
+	procDestroyEnvironmentBlock      = moduserenv.NewProc("DestroyEnvironmentBlock")
+	procGetProfilesDirectoryW        = moduserenv.NewProc("GetProfilesDirectoryW")
+	procWSASocketW                   = modws2_32.NewProc("WSASocketW")
+)
+
+func adjustTokenPrivileges(token syscall.Token, disableAllPrivileges bool, newstate *TOKEN_PRIVILEGES, buflen uint32, prevstate *TOKEN_PRIVILEGES, returnlen *uint32) (ret uint32, err error) {
+	var _p0 uint32
+	if disableAllPrivileges {
+		_p0 = 1
+	}
+	r0, _, e1 := syscall.Syscall6(procAdjustTokenPrivileges.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(newstate)), uintptr(buflen), uintptr(unsafe.Pointer(prevstate)), uintptr(unsafe.Pointer(returnlen)))
+	ret = uint32(r0)
+	if true {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func DuplicateTokenEx(hExistingToken syscall.Token, dwDesiredAccess uint32, lpTokenAttributes *syscall.SecurityAttributes, impersonationLevel uint32, tokenType TokenType, phNewToken *syscall.Token) (err error) {
+	r1, _, e1 := syscall.Syscall6(procDuplicateTokenEx.Addr(), 6, uintptr(hExistingToken), uintptr(dwDesiredAccess), uintptr(unsafe.Pointer(lpTokenAttributes)), uintptr(impersonationLevel), uintptr(tokenType), uintptr(unsafe.Pointer(phNewToken)))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func ImpersonateSelf(impersonationlevel uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procImpersonateSelf.Addr(), 1, uintptr(impersonationlevel), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func LookupPrivilegeValue(systemname *uint16, name *uint16, luid *LUID) (err error) {
+	r1, _, e1 := syscall.Syscall(procLookupPrivilegeValueW.Addr(), 3, uintptr(unsafe.Pointer(systemname)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid)))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func OpenThreadToken(h syscall.Handle, access uint32, openasself bool, token *syscall.Token) (err error) {
+	var _p0 uint32
+	if openasself {
+		_p0 = 1
+	}
+	r1, _, e1 := syscall.Syscall6(procOpenThreadToken.Addr(), 4, uintptr(h), uintptr(access), uintptr(_p0), uintptr(unsafe.Pointer(token)), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func RevertToSelf() (err error) {
+	r1, _, e1 := syscall.Syscall(procRevertToSelf.Addr(), 0, 0, 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func SetTokenInformation(tokenHandle syscall.Token, tokenInformationClass uint32, tokenInformation uintptr, tokenInformationLength uint32) (err error) {
+	r1, _, e1 := syscall.Syscall6(procSetTokenInformation.Addr(), 4, uintptr(tokenHandle), uintptr(tokenInformationClass), uintptr(tokenInformation), uintptr(tokenInformationLength), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func RtlGenRandom(buf []byte) (err error) {
+	var _p0 *byte
+	if len(buf) > 0 {
+		_p0 = &buf[0]
+	}
+	r1, _, e1 := syscall.Syscall(procSystemFunction036.Addr(), 2, uintptr(unsafe.Pointer(_p0)), uintptr(len(buf)), 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapterAddresses *IpAdapterAddresses, sizePointer *uint32) (errcode error) {
+	r0, _, _ := syscall.Syscall6(procGetAdaptersAddresses.Addr(), 5, uintptr(family), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(adapterAddresses)), uintptr(unsafe.Pointer(sizePointer)), 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
+func GetACP() (acp uint32) {
+	r0, _, _ := syscall.Syscall(procGetACP.Addr(), 0, 0, 0, 0)
+	acp = uint32(r0)
+	return
+}
+
+func GetComputerNameEx(nameformat uint32, buf *uint16, n *uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procGetComputerNameExW.Addr(), 3, uintptr(nameformat), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(n)))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetConsoleCP() (ccp uint32) {
+	r0, _, _ := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0)
+	ccp = uint32(r0)
+	return
+}
+
+func GetCurrentThread() (pseudoHandle syscall.Handle, err error) {
+	r0, _, e1 := syscall.Syscall(procGetCurrentThread.Addr(), 0, 0, 0, 0)
+	pseudoHandle = syscall.Handle(r0)
+	if pseudoHandle == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetFileInformationByHandleEx(handle syscall.Handle, class uint32, info *byte, bufsize uint32) (err error) {
+	r1, _, e1 := syscall.Syscall6(procGetFileInformationByHandleEx.Addr(), 4, uintptr(handle), uintptr(class), uintptr(unsafe.Pointer(info)), uintptr(bufsize), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetFinalPathNameByHandle(file syscall.Handle, filePath *uint16, filePathSize uint32, flags uint32) (n uint32, err error) {
+	r0, _, e1 := syscall.Syscall6(procGetFinalPathNameByHandleW.Addr(), 4, uintptr(file), uintptr(unsafe.Pointer(filePath)), uintptr(filePathSize), uintptr(flags), 0, 0)
+	n = uint32(r0)
+	if n == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetModuleFileName(module syscall.Handle, fn *uint16, len uint32) (n uint32, err error) {
+	r0, _, e1 := syscall.Syscall(procGetModuleFileNameW.Addr(), 3, uintptr(module), uintptr(unsafe.Pointer(fn)), uintptr(len))
+	n = uint32(r0)
+	if n == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func LockFileEx(file syscall.Handle, flags uint32, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) {
+	r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(file), uintptr(flags), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped)))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func MoveFileEx(from *uint16, to *uint16, flags uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) {
+	r0, _, e1 := syscall.Syscall6(procMultiByteToWideChar.Addr(), 6, uintptr(codePage), uintptr(dwFlags), uintptr(unsafe.Pointer(str)), uintptr(nstr), uintptr(unsafe.Pointer(wchar)), uintptr(nwchar))
+	nwrite = int32(r0)
+	if nwrite == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func SetFileInformationByHandle(handle syscall.Handle, fileInformationClass uint32, buf uintptr, bufsize uint32) (err error) {
+	r1, _, e1 := syscall.Syscall6(procSetFileInformationByHandle.Addr(), 4, uintptr(handle), uintptr(fileInformationClass), uintptr(buf), uintptr(bufsize), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func UnlockFileEx(file syscall.Handle, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) {
+	r1, _, e1 := syscall.Syscall6(procUnlockFileEx.Addr(), 5, uintptr(file), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped)), 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func NetShareAdd(serverName *uint16, level uint32, buf *byte, parmErr *uint16) (neterr error) {
+	r0, _, _ := syscall.Syscall6(procNetShareAdd.Addr(), 4, uintptr(unsafe.Pointer(serverName)), uintptr(level), uintptr(unsafe.Pointer(buf)), uintptr(unsafe.Pointer(parmErr)), 0, 0)
+	if r0 != 0 {
+		neterr = syscall.Errno(r0)
+	}
+	return
+}
+
+func NetShareDel(serverName *uint16, netName *uint16, reserved uint32) (neterr error) {
+	r0, _, _ := syscall.Syscall(procNetShareDel.Addr(), 3, uintptr(unsafe.Pointer(serverName)), uintptr(unsafe.Pointer(netName)), uintptr(reserved))
+	if r0 != 0 {
+		neterr = syscall.Errno(r0)
+	}
+	return
+}
+
+func NetUserGetLocalGroups(serverName *uint16, userName *uint16, level uint32, flags uint32, buf **byte, prefMaxLen uint32, entriesRead *uint32, totalEntries *uint32) (neterr error) {
+	r0, _, _ := syscall.Syscall9(procNetUserGetLocalGroups.Addr(), 8, uintptr(unsafe.Pointer(serverName)), uintptr(unsafe.Pointer(userName)), uintptr(level), uintptr(flags), uintptr(unsafe.Pointer(buf)), uintptr(prefMaxLen), uintptr(unsafe.Pointer(entriesRead)), uintptr(unsafe.Pointer(totalEntries)), 0)
+	if r0 != 0 {
+		neterr = syscall.Errno(r0)
+	}
+	return
+}
+
+func GetProcessMemoryInfo(handle syscall.Handle, memCounters *PROCESS_MEMORY_COUNTERS, cb uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procGetProcessMemoryInfo.Addr(), 3, uintptr(handle), uintptr(unsafe.Pointer(memCounters)), uintptr(cb))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func CreateEnvironmentBlock(block **uint16, token syscall.Token, inheritExisting bool) (err error) {
+	var _p0 uint32
+	if inheritExisting {
+		_p0 = 1
+	}
+	r1, _, e1 := syscall.Syscall(procCreateEnvironmentBlock.Addr(), 3, uintptr(unsafe.Pointer(block)), uintptr(token), uintptr(_p0))
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func DestroyEnvironmentBlock(block *uint16) (err error) {
+	r1, _, e1 := syscall.Syscall(procDestroyEnvironmentBlock.Addr(), 1, uintptr(unsafe.Pointer(block)), 0, 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func GetProfilesDirectory(dir *uint16, dirLen *uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procGetProfilesDirectoryW.Addr(), 2, uintptr(unsafe.Pointer(dir)), uintptr(unsafe.Pointer(dirLen)), 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+func WSASocket(af int32, typ int32, protocol int32, protinfo *syscall.WSAProtocolInfo, group uint32, flags uint32) (handle syscall.Handle, err error) {
+	r0, _, e1 := syscall.Syscall6(procWSASocketW.Addr(), 6, uintptr(af), uintptr(typ), uintptr(protocol), uintptr(unsafe.Pointer(protinfo)), uintptr(group), uintptr(flags))
+	handle = syscall.Handle(r0)
+	if handle == syscall.InvalidHandle {
+		err = errnoErr(e1)
+	}
+	return
+}
diff --git a/src/internal/sysinfo/sysinfo.go b/src/internal/sysinfo/sysinfo.go
new file mode 100644
index 0000000..961be7a
--- /dev/null
+++ b/src/internal/sysinfo/sysinfo.go
@@ -0,0 +1,31 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package sysinfo implements high level hardware information gathering
+// that can be used for debugging or information purposes.
+package sysinfo
+
+import (
+	internalcpu "internal/cpu"
+	"sync"
+)
+
+type cpuInfo struct {
+	once sync.Once
+	name string
+}
+
+var CPU cpuInfo
+
+func (cpu *cpuInfo) Name() string {
+	cpu.once.Do(func() {
+		// Try to get the information from internal/cpu.
+		if name := internalcpu.Name(); name != "" {
+			cpu.name = name
+			return
+		}
+		// TODO(martisch): use /proc/cpuinfo and /sys/devices/system/cpu/ on Linux as fallback.
+	})
+	return cpu.name
+}
diff --git a/src/internal/testenv/testenv.go b/src/internal/testenv/testenv.go
new file mode 100644
index 0000000..c902b14
--- /dev/null
+++ b/src/internal/testenv/testenv.go
@@ -0,0 +1,308 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package testenv provides information about what functionality
+// is available in different testing environments run by the Go team.
+//
+// It is an internal package because these details are specific
+// to the Go team's test setup (on build.golang.org) and not
+// fundamental to tests in general.
+package testenv
+
+import (
+	"errors"
+	"flag"
+	"internal/cfg"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"testing"
+)
+
+// Builder reports the name of the builder running this test
+// (for example, "linux-amd64" or "windows-386-gce").
+// If the test is not running on the build infrastructure,
+// Builder returns the empty string.
+func Builder() string {
+	return os.Getenv("GO_BUILDER_NAME")
+}
+
+// HasGoBuild reports whether the current system can build programs with ``go build''
+// and then run them with os.StartProcess or exec.Command.
+func HasGoBuild() bool {
+	if os.Getenv("GO_GCFLAGS") != "" {
+		// It's too much work to require every caller of the go command
+		// to pass along "-gcflags="+os.Getenv("GO_GCFLAGS").
+		// For now, if $GO_GCFLAGS is set, report that we simply can't
+		// run go build.
+		return false
+	}
+	switch runtime.GOOS {
+	case "android", "js", "ios":
+		return false
+	}
+	return true
+}
+
+// MustHaveGoBuild checks that the current system can build programs with ``go build''
+// and then run them with os.StartProcess or exec.Command.
+// If not, MustHaveGoBuild calls t.Skip with an explanation.
+func MustHaveGoBuild(t testing.TB) {
+	if os.Getenv("GO_GCFLAGS") != "" {
+		t.Skipf("skipping test: 'go build' not compatible with setting $GO_GCFLAGS")
+	}
+	if !HasGoBuild() {
+		t.Skipf("skipping test: 'go build' not available on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+}
+
+// HasGoRun reports whether the current system can run programs with ``go run.''
+func HasGoRun() bool {
+	// For now, having go run and having go build are the same.
+	return HasGoBuild()
+}
+
+// MustHaveGoRun checks that the current system can run programs with ``go run.''
+// If not, MustHaveGoRun calls t.Skip with an explanation.
+func MustHaveGoRun(t testing.TB) {
+	if !HasGoRun() {
+		t.Skipf("skipping test: 'go run' not available on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+}
+
+// GoToolPath reports the path to the Go tool.
+// It is a convenience wrapper around GoTool.
+// If the tool is unavailable GoToolPath calls t.Skip.
+// If the tool should be available and isn't, GoToolPath calls t.Fatal.
+func GoToolPath(t testing.TB) string {
+	MustHaveGoBuild(t)
+	path, err := GoTool()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// Add all environment variables that affect the Go command to test metadata.
+	// Cached test results will be invalidate when these variables change.
+	// See golang.org/issue/32285.
+	for _, envVar := range strings.Fields(cfg.KnownEnv) {
+		os.Getenv(envVar)
+	}
+	return path
+}
+
+// GoTool reports the path to the Go tool.
+func GoTool() (string, error) {
+	if !HasGoBuild() {
+		return "", errors.New("platform cannot run go tool")
+	}
+	var exeSuffix string
+	if runtime.GOOS == "windows" {
+		exeSuffix = ".exe"
+	}
+	path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix)
+	if _, err := os.Stat(path); err == nil {
+		return path, nil
+	}
+	goBin, err := exec.LookPath("go" + exeSuffix)
+	if err != nil {
+		return "", errors.New("cannot find go tool: " + err.Error())
+	}
+	return goBin, nil
+}
+
+// HasExec reports whether the current system can start new processes
+// using os.StartProcess or (more commonly) exec.Command.
+func HasExec() bool {
+	switch runtime.GOOS {
+	case "js", "ios":
+		return false
+	}
+	return true
+}
+
+// HasSrc reports whether the entire source tree is available under GOROOT.
+func HasSrc() bool {
+	switch runtime.GOOS {
+	case "ios":
+		return false
+	}
+	return true
+}
+
+// MustHaveExec checks that the current system can start new processes
+// using os.StartProcess or (more commonly) exec.Command.
+// If not, MustHaveExec calls t.Skip with an explanation.
+func MustHaveExec(t testing.TB) {
+	if !HasExec() {
+		t.Skipf("skipping test: cannot exec subprocess on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+}
+
+var execPaths sync.Map // path -> error
+
+// MustHaveExecPath checks that the current system can start the named executable
+// using os.StartProcess or (more commonly) exec.Command.
+// If not, MustHaveExecPath calls t.Skip with an explanation.
+func MustHaveExecPath(t testing.TB, path string) {
+	MustHaveExec(t)
+
+	err, found := execPaths.Load(path)
+	if !found {
+		_, err = exec.LookPath(path)
+		err, _ = execPaths.LoadOrStore(path, err)
+	}
+	if err != nil {
+		t.Skipf("skipping test: %s: %s", path, err)
+	}
+}
+
+// HasExternalNetwork reports whether the current system can use
+// external (non-localhost) networks.
+func HasExternalNetwork() bool {
+	return !testing.Short() && runtime.GOOS != "js"
+}
+
+// MustHaveExternalNetwork checks that the current system can use
+// external (non-localhost) networks.
+// If not, MustHaveExternalNetwork calls t.Skip with an explanation.
+func MustHaveExternalNetwork(t testing.TB) {
+	if runtime.GOOS == "js" {
+		t.Skipf("skipping test: no external network on %s", runtime.GOOS)
+	}
+	if testing.Short() {
+		t.Skipf("skipping test: no external network in -short mode")
+	}
+}
+
+var haveCGO bool
+
+// HasCGO reports whether the current system can use cgo.
+func HasCGO() bool {
+	return haveCGO
+}
+
+// MustHaveCGO calls t.Skip if cgo is not available.
+func MustHaveCGO(t testing.TB) {
+	if !haveCGO {
+		t.Skipf("skipping test: no cgo")
+	}
+}
+
+// CanInternalLink reports whether the current system can link programs with
+// internal linking.
+// (This is the opposite of cmd/internal/sys.MustLinkExternal. Keep them in sync.)
+func CanInternalLink() bool {
+	switch runtime.GOOS {
+	case "android":
+		if runtime.GOARCH != "arm64" {
+			return false
+		}
+	case "ios":
+		if runtime.GOARCH == "arm64" {
+			return false
+		}
+	}
+	return true
+}
+
+// MustInternalLink checks that the current system can link programs with internal
+// linking.
+// If not, MustInternalLink calls t.Skip with an explanation.
+func MustInternalLink(t testing.TB) {
+	if !CanInternalLink() {
+		t.Skipf("skipping test: internal linking on %s/%s is not supported", runtime.GOOS, runtime.GOARCH)
+	}
+}
+
+// HasSymlink reports whether the current system can use os.Symlink.
+func HasSymlink() bool {
+	ok, _ := hasSymlink()
+	return ok
+}
+
+// MustHaveSymlink reports whether the current system can use os.Symlink.
+// If not, MustHaveSymlink calls t.Skip with an explanation.
+func MustHaveSymlink(t testing.TB) {
+	ok, reason := hasSymlink()
+	if !ok {
+		t.Skipf("skipping test: cannot make symlinks on %s/%s%s", runtime.GOOS, runtime.GOARCH, reason)
+	}
+}
+
+// HasLink reports whether the current system can use os.Link.
+func HasLink() bool {
+	// From Android release M (Marshmallow), hard linking files is blocked
+	// and an attempt to call link() on a file will return EACCES.
+	// - https://code.google.com/p/android-developer-preview/issues/detail?id=3150
+	return runtime.GOOS != "plan9" && runtime.GOOS != "android"
+}
+
+// MustHaveLink reports whether the current system can use os.Link.
+// If not, MustHaveLink calls t.Skip with an explanation.
+func MustHaveLink(t testing.TB) {
+	if !HasLink() {
+		t.Skipf("skipping test: hardlinks are not supported on %s/%s", runtime.GOOS, runtime.GOARCH)
+	}
+}
+
+var flaky = flag.Bool("flaky", false, "run known-flaky tests too")
+
+func SkipFlaky(t testing.TB, issue int) {
+	t.Helper()
+	if !*flaky {
+		t.Skipf("skipping known flaky test without the -flaky flag; see golang.org/issue/%d", issue)
+	}
+}
+
+func SkipFlakyNet(t testing.TB) {
+	t.Helper()
+	if v, _ := strconv.ParseBool(os.Getenv("GO_BUILDER_FLAKY_NET")); v {
+		t.Skip("skipping test on builder known to have frequent network failures")
+	}
+}
+
+// CleanCmdEnv will fill cmd.Env with the environment, excluding certain
+// variables that could modify the behavior of the Go tools such as
+// GODEBUG and GOTRACEBACK.
+func CleanCmdEnv(cmd *exec.Cmd) *exec.Cmd {
+	if cmd.Env != nil {
+		panic("environment already set")
+	}
+	for _, env := range os.Environ() {
+		// Exclude GODEBUG from the environment to prevent its output
+		// from breaking tests that are trying to parse other command output.
+		if strings.HasPrefix(env, "GODEBUG=") {
+			continue
+		}
+		// Exclude GOTRACEBACK for the same reason.
+		if strings.HasPrefix(env, "GOTRACEBACK=") {
+			continue
+		}
+		cmd.Env = append(cmd.Env, env)
+	}
+	return cmd
+}
+
+// CPUIsSlow reports whether the CPU running the test is suspected to be slow.
+func CPUIsSlow() bool {
+	switch runtime.GOARCH {
+	case "arm", "mips", "mipsle", "mips64", "mips64le":
+		return true
+	}
+	return false
+}
+
+// SkipIfShortAndSlow skips t if -short is set and the CPU running the test is
+// suspected to be slow.
+//
+// (This is useful for CPU-intensive tests that otherwise complete quickly.)
+func SkipIfShortAndSlow(t testing.TB) {
+	if testing.Short() && CPUIsSlow() {
+		t.Helper()
+		t.Skipf("skipping test in -short mode on %s", runtime.GOARCH)
+	}
+}
diff --git a/src/internal/testenv/testenv_cgo.go b/src/internal/testenv/testenv_cgo.go
new file mode 100644
index 0000000..02f08f5
--- /dev/null
+++ b/src/internal/testenv/testenv_cgo.go
@@ -0,0 +1,12 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build cgo
+// +build cgo
+
+package testenv
+
+func init() {
+	haveCGO = true
+}
diff --git a/src/internal/testenv/testenv_notwin.go b/src/internal/testenv/testenv_notwin.go
new file mode 100644
index 0000000..846ec93
--- /dev/null
+++ b/src/internal/testenv/testenv_notwin.go
@@ -0,0 +1,21 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+// +build !windows
+
+package testenv
+
+import (
+	"runtime"
+)
+
+func hasSymlink() (ok bool, reason string) {
+	switch runtime.GOOS {
+	case "android", "plan9":
+		return false, ""
+	}
+
+	return true, ""
+}
diff --git a/src/internal/testenv/testenv_windows.go b/src/internal/testenv/testenv_windows.go
new file mode 100644
index 0000000..4802b13
--- /dev/null
+++ b/src/internal/testenv/testenv_windows.go
@@ -0,0 +1,47 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testenv
+
+import (
+	"os"
+	"path/filepath"
+	"sync"
+	"syscall"
+)
+
+var symlinkOnce sync.Once
+var winSymlinkErr error
+
+func initWinHasSymlink() {
+	tmpdir, err := os.MkdirTemp("", "symtest")
+	if err != nil {
+		panic("failed to create temp directory: " + err.Error())
+	}
+	defer os.RemoveAll(tmpdir)
+
+	err = os.Symlink("target", filepath.Join(tmpdir, "symlink"))
+	if err != nil {
+		err = err.(*os.LinkError).Err
+		switch err {
+		case syscall.EWINDOWS, syscall.ERROR_PRIVILEGE_NOT_HELD:
+			winSymlinkErr = err
+		}
+	}
+}
+
+func hasSymlink() (ok bool, reason string) {
+	symlinkOnce.Do(initWinHasSymlink)
+
+	switch winSymlinkErr {
+	case nil:
+		return true, ""
+	case syscall.EWINDOWS:
+		return false, ": symlinks are not supported on your version of Windows"
+	case syscall.ERROR_PRIVILEGE_NOT_HELD:
+		return false, ": you don't have enough privileges to create symlinks"
+	}
+
+	return false, ""
+}
diff --git a/src/internal/testlog/exit.go b/src/internal/testlog/exit.go
new file mode 100644
index 0000000..e15defd
--- /dev/null
+++ b/src/internal/testlog/exit.go
@@ -0,0 +1,33 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testlog
+
+import "sync"
+
+// PanicOnExit0 reports whether to panic on a call to os.Exit(0).
+// This is in the testlog package because, like other definitions in
+// package testlog, it is a hook between the testing package and the
+// os package. This is used to ensure that an early call to os.Exit(0)
+// does not cause a test to pass.
+func PanicOnExit0() bool {
+	panicOnExit0.mu.Lock()
+	defer panicOnExit0.mu.Unlock()
+	return panicOnExit0.val
+}
+
+// panicOnExit0 is the flag used for PanicOnExit0. This uses a lock
+// because the value can be cleared via a timer call that may race
+// with calls to os.Exit
+var panicOnExit0 struct {
+	mu  sync.Mutex
+	val bool
+}
+
+// SetPanicOnExit0 sets panicOnExit0 to v.
+func SetPanicOnExit0(v bool) {
+	panicOnExit0.mu.Lock()
+	defer panicOnExit0.mu.Unlock()
+	panicOnExit0.val = v
+}
diff --git a/src/internal/testlog/log.go b/src/internal/testlog/log.go
new file mode 100644
index 0000000..3c5f780
--- /dev/null
+++ b/src/internal/testlog/log.go
@@ -0,0 +1,69 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package testlog provides a back-channel communication path
+// between tests and package os, so that cmd/go can see which
+// environment variables and files a test consults.
+package testlog
+
+import "sync/atomic"
+
+// Interface is the interface required of test loggers.
+// The os package will invoke the interface's methods to indicate that
+// it is inspecting the given environment variables or files.
+// Multiple goroutines may call these methods simultaneously.
+type Interface interface {
+	Getenv(key string)
+	Stat(file string)
+	Open(file string)
+	Chdir(dir string)
+}
+
+// logger is the current logger Interface.
+// We use an atomic.Value in case test startup
+// is racing with goroutines started during init.
+// That must not cause a race detector failure,
+// although it will still result in limited visibility
+// into exactly what those goroutines do.
+var logger atomic.Value
+
+// SetLogger sets the test logger implementation for the current process.
+// It must be called only once, at process startup.
+func SetLogger(impl Interface) {
+	if logger.Load() != nil {
+		panic("testlog: SetLogger must be called only once")
+	}
+	logger.Store(&impl)
+}
+
+// Logger returns the current test logger implementation.
+// It returns nil if there is no logger.
+func Logger() Interface {
+	impl := logger.Load()
+	if impl == nil {
+		return nil
+	}
+	return *impl.(*Interface)
+}
+
+// Getenv calls Logger().Getenv, if a logger has been set.
+func Getenv(name string) {
+	if log := Logger(); log != nil {
+		log.Getenv(name)
+	}
+}
+
+// Open calls Logger().Open, if a logger has been set.
+func Open(name string) {
+	if log := Logger(); log != nil {
+		log.Open(name)
+	}
+}
+
+// Stat calls Logger().Stat, if a logger has been set.
+func Stat(name string) {
+	if log := Logger(); log != nil {
+		log.Stat(name)
+	}
+}
diff --git a/src/internal/trace/gc.go b/src/internal/trace/gc.go
new file mode 100644
index 0000000..cc19fdf
--- /dev/null
+++ b/src/internal/trace/gc.go
@@ -0,0 +1,825 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"container/heap"
+	"math"
+	"sort"
+	"strings"
+	"time"
+)
+
+// MutatorUtil is a change in mutator utilization at a particular
+// time. Mutator utilization functions are represented as a
+// time-ordered []MutatorUtil.
+type MutatorUtil struct {
+	Time int64
+	// Util is the mean mutator utilization starting at Time. This
+	// is in the range [0, 1].
+	Util float64
+}
+
+// UtilFlags controls the behavior of MutatorUtilization.
+type UtilFlags int
+
+const (
+	// UtilSTW means utilization should account for STW events.
+	UtilSTW UtilFlags = 1 << iota
+	// UtilBackground means utilization should account for
+	// background mark workers.
+	UtilBackground
+	// UtilAssist means utilization should account for mark
+	// assists.
+	UtilAssist
+	// UtilSweep means utilization should account for sweeping.
+	UtilSweep
+
+	// UtilPerProc means each P should be given a separate
+	// utilization function. Otherwise, there is a single function
+	// and each P is given a fraction of the utilization.
+	UtilPerProc
+)
+
+// MutatorUtilization returns a set of mutator utilization functions
+// for the given trace. Each function will always end with 0
+// utilization. The bounds of each function are implicit in the first
+// and last event; outside of these bounds each function is undefined.
+//
+// If the UtilPerProc flag is not given, this always returns a single
+// utilization function. Otherwise, it returns one function per P.
+func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
+	if len(events) == 0 {
+		return nil
+	}
+
+	type perP struct {
+		// gc > 0 indicates that GC is active on this P.
+		gc int
+		// series the logical series number for this P. This
+		// is necessary because Ps may be removed and then
+		// re-added, and then the new P needs a new series.
+		series int
+	}
+	ps := []perP{}
+	stw := 0
+
+	out := [][]MutatorUtil{}
+	assists := map[uint64]bool{}
+	block := map[uint64]*Event{}
+	bgMark := map[uint64]bool{}
+
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGomaxprocs:
+			gomaxprocs := int(ev.Args[0])
+			if len(ps) > gomaxprocs {
+				if flags&UtilPerProc != 0 {
+					// End each P's series.
+					for _, p := range ps[gomaxprocs:] {
+						out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
+					}
+				}
+				ps = ps[:gomaxprocs]
+			}
+			for len(ps) < gomaxprocs {
+				// Start new P's series.
+				series := 0
+				if flags&UtilPerProc != 0 || len(out) == 0 {
+					series = len(out)
+					out = append(out, []MutatorUtil{{ev.Ts, 1}})
+				}
+				ps = append(ps, perP{series: series})
+			}
+		case EvGCSTWStart:
+			if flags&UtilSTW != 0 {
+				stw++
+			}
+		case EvGCSTWDone:
+			if flags&UtilSTW != 0 {
+				stw--
+			}
+		case EvGCMarkAssistStart:
+			if flags&UtilAssist != 0 {
+				ps[ev.P].gc++
+				assists[ev.G] = true
+			}
+		case EvGCMarkAssistDone:
+			if flags&UtilAssist != 0 {
+				ps[ev.P].gc--
+				delete(assists, ev.G)
+			}
+		case EvGCSweepStart:
+			if flags&UtilSweep != 0 {
+				ps[ev.P].gc++
+			}
+		case EvGCSweepDone:
+			if flags&UtilSweep != 0 {
+				ps[ev.P].gc--
+			}
+		case EvGoStartLabel:
+			if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
+				// Background mark worker.
+				//
+				// If we're in per-proc mode, we don't
+				// count dedicated workers because
+				// they kick all of the goroutines off
+				// that P, so don't directly
+				// contribute to goroutine latency.
+				if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
+					bgMark[ev.G] = true
+					ps[ev.P].gc++
+				}
+			}
+			fallthrough
+		case EvGoStart:
+			if assists[ev.G] {
+				// Unblocked during assist.
+				ps[ev.P].gc++
+			}
+			block[ev.G] = ev.Link
+		default:
+			if ev != block[ev.G] {
+				continue
+			}
+
+			if assists[ev.G] {
+				// Blocked during assist.
+				ps[ev.P].gc--
+			}
+			if bgMark[ev.G] {
+				// Background mark worker done.
+				ps[ev.P].gc--
+				delete(bgMark, ev.G)
+			}
+			delete(block, ev.G)
+		}
+
+		if flags&UtilPerProc == 0 {
+			// Compute the current average utilization.
+			if len(ps) == 0 {
+				continue
+			}
+			gcPs := 0
+			if stw > 0 {
+				gcPs = len(ps)
+			} else {
+				for i := range ps {
+					if ps[i].gc > 0 {
+						gcPs++
+					}
+				}
+			}
+			mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
+
+			// Record the utilization change. (Since
+			// len(ps) == len(out), we know len(out) > 0.)
+			out[0] = addUtil(out[0], mu)
+		} else {
+			// Check for per-P utilization changes.
+			for i := range ps {
+				p := &ps[i]
+				util := 1.0
+				if stw > 0 || p.gc > 0 {
+					util = 0.0
+				}
+				out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
+			}
+		}
+	}
+
+	// Add final 0 utilization event to any remaining series. This
+	// is important to mark the end of the trace. The exact value
+	// shouldn't matter since no window should extend beyond this,
+	// but using 0 is symmetric with the start of the trace.
+	mu := MutatorUtil{events[len(events)-1].Ts, 0}
+	for i := range ps {
+		out[ps[i].series] = addUtil(out[ps[i].series], mu)
+	}
+	return out
+}
+
+func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
+	if len(util) > 0 {
+		if mu.Util == util[len(util)-1].Util {
+			// No change.
+			return util
+		}
+		if mu.Time == util[len(util)-1].Time {
+			// Take the lowest utilization at a time stamp.
+			if mu.Util < util[len(util)-1].Util {
+				util[len(util)-1] = mu
+			}
+			return util
+		}
+	}
+	return append(util, mu)
+}
+
+// totalUtil is total utilization, measured in nanoseconds. This is a
+// separate type primarily to distinguish it from mean utilization,
+// which is also a float64.
+type totalUtil float64
+
+func totalUtilOf(meanUtil float64, dur int64) totalUtil {
+	return totalUtil(meanUtil * float64(dur))
+}
+
+// mean returns the mean utilization over dur.
+func (u totalUtil) mean(dur time.Duration) float64 {
+	return float64(u) / float64(dur)
+}
+
+// An MMUCurve is the minimum mutator utilization curve across
+// multiple window sizes.
+type MMUCurve struct {
+	series []mmuSeries
+}
+
+type mmuSeries struct {
+	util []MutatorUtil
+	// sums[j] is the cumulative sum of util[:j].
+	sums []totalUtil
+	// bands summarizes util in non-overlapping bands of duration
+	// bandDur.
+	bands []mmuBand
+	// bandDur is the duration of each band.
+	bandDur int64
+}
+
+type mmuBand struct {
+	// minUtil is the minimum instantaneous mutator utilization in
+	// this band.
+	minUtil float64
+	// cumUtil is the cumulative total mutator utilization between
+	// time 0 and the left edge of this band.
+	cumUtil totalUtil
+
+	// integrator is the integrator for the left edge of this
+	// band.
+	integrator integrator
+}
+
+// NewMMUCurve returns an MMU curve for the given mutator utilization
+// function.
+func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
+	series := make([]mmuSeries, len(utils))
+	for i, util := range utils {
+		series[i] = newMMUSeries(util)
+	}
+	return &MMUCurve{series}
+}
+
+// bandsPerSeries is the number of bands to divide each series into.
+// This is only changed by tests.
+var bandsPerSeries = 1000
+
+func newMMUSeries(util []MutatorUtil) mmuSeries {
+	// Compute cumulative sum.
+	sums := make([]totalUtil, len(util))
+	var prev MutatorUtil
+	var sum totalUtil
+	for j, u := range util {
+		sum += totalUtilOf(prev.Util, u.Time-prev.Time)
+		sums[j] = sum
+		prev = u
+	}
+
+	// Divide the utilization curve up into equal size
+	// non-overlapping "bands" and compute a summary for each of
+	// these bands.
+	//
+	// Compute the duration of each band.
+	numBands := bandsPerSeries
+	if numBands > len(util) {
+		// There's no point in having lots of bands if there
+		// aren't many events.
+		numBands = len(util)
+	}
+	dur := util[len(util)-1].Time - util[0].Time
+	bandDur := (dur + int64(numBands) - 1) / int64(numBands)
+	if bandDur < 1 {
+		bandDur = 1
+	}
+	// Compute the bands. There are numBands+1 bands in order to
+	// record the final cumulative sum.
+	bands := make([]mmuBand, numBands+1)
+	s := mmuSeries{util, sums, bands, bandDur}
+	leftSum := integrator{&s, 0}
+	for i := range bands {
+		startTime, endTime := s.bandTime(i)
+		cumUtil := leftSum.advance(startTime)
+		predIdx := leftSum.pos
+		minUtil := 1.0
+		for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
+			minUtil = math.Min(minUtil, util[i].Util)
+		}
+		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
+	}
+
+	return s
+}
+
+func (s *mmuSeries) bandTime(i int) (start, end int64) {
+	start = int64(i)*s.bandDur + s.util[0].Time
+	end = start + s.bandDur
+	return
+}
+
+type bandUtil struct {
+	// Utilization series index
+	series int
+	// Band index
+	i int
+	// Lower bound of mutator utilization for all windows
+	// with a left edge in this band.
+	utilBound float64
+}
+
+type bandUtilHeap []bandUtil
+
+func (h bandUtilHeap) Len() int {
+	return len(h)
+}
+
+func (h bandUtilHeap) Less(i, j int) bool {
+	return h[i].utilBound < h[j].utilBound
+}
+
+func (h bandUtilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *bandUtilHeap) Push(x interface{}) {
+	*h = append(*h, x.(bandUtil))
+}
+
+func (h *bandUtilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+// UtilWindow is a specific window at Time.
+type UtilWindow struct {
+	Time int64
+	// MutatorUtil is the mean mutator utilization in this window.
+	MutatorUtil float64
+}
+
+type utilHeap []UtilWindow
+
+func (h utilHeap) Len() int {
+	return len(h)
+}
+
+func (h utilHeap) Less(i, j int) bool {
+	if h[i].MutatorUtil != h[j].MutatorUtil {
+		return h[i].MutatorUtil > h[j].MutatorUtil
+	}
+	return h[i].Time > h[j].Time
+}
+
+func (h utilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *utilHeap) Push(x interface{}) {
+	*h = append(*h, x.(UtilWindow))
+}
+
+func (h *utilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+// An accumulator takes a windowed mutator utilization function and
+// tracks various statistics for that function.
+type accumulator struct {
+	mmu float64
+
+	// bound is the mutator utilization bound where adding any
+	// mutator utilization above this bound cannot affect the
+	// accumulated statistics.
+	bound float64
+
+	// Worst N window tracking
+	nWorst int
+	wHeap  utilHeap
+
+	// Mutator utilization distribution tracking
+	mud *mud
+	// preciseMass is the distribution mass that must be precise
+	// before accumulation is stopped.
+	preciseMass float64
+	// lastTime and lastMU are the previous point added to the
+	// windowed mutator utilization function.
+	lastTime int64
+	lastMU   float64
+}
+
+// resetTime declares a discontinuity in the windowed mutator
+// utilization function by resetting the current time.
+func (acc *accumulator) resetTime() {
+	// This only matters for distribution collection, since that's
+	// the only thing that depends on the progression of the
+	// windowed mutator utilization function.
+	acc.lastTime = math.MaxInt64
+}
+
+// addMU adds a point to the windowed mutator utilization function at
+// (time, mu). This must be called for monotonically increasing values
+// of time.
+//
+// It returns true if further calls to addMU would be pointless.
+func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
+	if mu < acc.mmu {
+		acc.mmu = mu
+	}
+	acc.bound = acc.mmu
+
+	if acc.nWorst == 0 {
+		// If the minimum has reached zero, it can't go any
+		// lower, so we can stop early.
+		return mu == 0
+	}
+
+	// Consider adding this window to the n worst.
+	if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
+		// This window is lower than the K'th worst window.
+		//
+		// Check if there's any overlapping window
+		// already in the heap and keep whichever is
+		// worse.
+		for i, ui := range acc.wHeap {
+			if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
+				if ui.MutatorUtil <= mu {
+					// Keep the first window.
+					goto keep
+				} else {
+					// Replace it with this window.
+					heap.Remove(&acc.wHeap, i)
+					break
+				}
+			}
+		}
+
+		heap.Push(&acc.wHeap, UtilWindow{time, mu})
+		if len(acc.wHeap) > acc.nWorst {
+			heap.Pop(&acc.wHeap)
+		}
+	keep:
+	}
+
+	if len(acc.wHeap) < acc.nWorst {
+		// We don't have N windows yet, so keep accumulating.
+		acc.bound = 1.0
+	} else {
+		// Anything above the least worst window has no effect.
+		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
+	}
+
+	if acc.mud != nil {
+		if acc.lastTime != math.MaxInt64 {
+			// Update distribution.
+			acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
+		}
+		acc.lastTime, acc.lastMU = time, mu
+		if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
+			acc.bound = math.Max(acc.bound, mudBound)
+		} else {
+			// We haven't accumulated enough total precise
+			// mass yet to even reach our goal, so keep
+			// accumulating.
+			acc.bound = 1
+		}
+		// It's not worth checking percentiles every time, so
+		// just keep accumulating this band.
+		return false
+	}
+
+	// If we've found enough 0 utilizations, we can stop immediately.
+	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
+}
+
+// MMU returns the minimum mutator utilization for the given time
+// window. This is the minimum utilization for all windows of this
+// duration across the execution. The returned value is in the range
+// [0, 1].
+func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
+	acc := accumulator{mmu: 1.0, bound: 1.0}
+	c.mmu(window, &acc)
+	return acc.mmu
+}
+
+// Examples returns n specific examples of the lowest mutator
+// utilization for the given window size. The returned windows will be
+// disjoint (otherwise there would be a huge number of
+// mostly-overlapping windows at the single lowest point). There are
+// no guarantees on which set of disjoint windows this returns.
+func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
+	acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
+	c.mmu(window, &acc)
+	sort.Sort(sort.Reverse(acc.wHeap))
+	return ([]UtilWindow)(acc.wHeap)
+}
+
+// MUD returns mutator utilization distribution quantiles for the
+// given window size.
+//
+// The mutator utilization distribution is the distribution of mean
+// mutator utilization across all windows of the given window size in
+// the trace.
+//
+// The minimum mutator utilization is the minimum (0th percentile) of
+// this distribution. (However, if only the minimum is desired, it's
+// more efficient to use the MMU method.)
+func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
+	if len(quantiles) == 0 {
+		return []float64{}
+	}
+
+	// Each unrefined band contributes a known total mass to the
+	// distribution (bandDur except at the end), but in an unknown
+	// way. However, we know that all the mass it contributes must
+	// be at or above its worst-case mean mutator utilization.
+	//
+	// Hence, we refine bands until the highest desired
+	// distribution quantile is less than the next worst-case mean
+	// mutator utilization. At this point, all further
+	// contributions to the distribution must be beyond the
+	// desired quantile and hence cannot affect it.
+	//
+	// First, find the highest desired distribution quantile.
+	maxQ := quantiles[0]
+	for _, q := range quantiles {
+		if q > maxQ {
+			maxQ = q
+		}
+	}
+	// The distribution's mass is in units of time (it's not
+	// normalized because this would make it more annoying to
+	// account for future contributions of unrefined bands). The
+	// total final mass will be the duration of the trace itself
+	// minus the window size. Using this, we can compute the mass
+	// corresponding to quantile maxQ.
+	var duration int64
+	for _, s := range c.series {
+		duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
+		if duration1 >= int64(window) {
+			duration += duration1 - int64(window)
+		}
+	}
+	qMass := float64(duration) * maxQ
+
+	// Accumulate the MUD until we have precise information for
+	// everything to the left of qMass.
+	acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
+	acc.mud.setTrackMass(qMass)
+	c.mmu(window, &acc)
+
+	// Evaluate the quantiles on the accumulated MUD.
+	out := make([]float64, len(quantiles))
+	for i := range out {
+		mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
+		if math.IsNaN(mu) {
+			// There are a few legitimate ways this can
+			// happen:
+			//
+			// 1. If the window is the full trace
+			// duration, then the windowed MU function is
+			// only defined at a single point, so the MU
+			// distribution is not well-defined.
+			//
+			// 2. If there are no events, then the MU
+			// distribution has no mass.
+			//
+			// Either way, all of the quantiles will have
+			// converged toward the MMU at this point.
+			mu = acc.mmu
+		}
+		out[i] = mu
+	}
+	return out
+}
+
+func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
+	if window <= 0 {
+		acc.mmu = 0
+		return
+	}
+
+	var bandU bandUtilHeap
+	windows := make([]time.Duration, len(c.series))
+	for i, s := range c.series {
+		windows[i] = window
+		if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
+			windows[i] = max
+		}
+
+		bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
+		if bandU == nil {
+			bandU = bandU1
+		} else {
+			bandU = append(bandU, bandU1...)
+		}
+	}
+
+	// Process bands from lowest utilization bound to highest.
+	heap.Init(&bandU)
+
+	// Refine each band into a precise window and MMU until
+	// refining the next lowest band can no longer affect the MMU
+	// or windows.
+	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
+		i := bandU[0].series
+		c.series[i].bandMMU(bandU[0].i, windows[i], acc)
+		heap.Pop(&bandU)
+	}
+}
+
+func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
+	// For each band, compute the worst-possible total mutator
+	// utilization for all windows that start in that band.
+
+	// minBands is the minimum number of bands a window can span
+	// and maxBands is the maximum number of bands a window can
+	// span in any alignment.
+	minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
+	maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
+	if window > 1 && maxBands < 2 {
+		panic("maxBands < 2")
+	}
+	tailDur := int64(window) % c.bandDur
+	nUtil := len(c.bands) - maxBands + 1
+	if nUtil < 0 {
+		nUtil = 0
+	}
+	bandU := make([]bandUtil, nUtil)
+	for i := range bandU {
+		// To compute the worst-case MU, we assume the minimum
+		// for any bands that are only partially overlapped by
+		// some window and the mean for any bands that are
+		// completely covered by all windows.
+		var util totalUtil
+
+		// Find the lowest and second lowest of the partial
+		// bands.
+		l := c.bands[i].minUtil
+		r1 := c.bands[i+minBands-1].minUtil
+		r2 := c.bands[i+maxBands-1].minUtil
+		minBand := math.Min(l, math.Min(r1, r2))
+		// Assume the worst window maximally overlaps the
+		// worst minimum and then the rest overlaps the second
+		// worst minimum.
+		if minBands == 1 {
+			util += totalUtilOf(minBand, int64(window))
+		} else {
+			util += totalUtilOf(minBand, c.bandDur)
+			midBand := 0.0
+			switch {
+			case minBand == l:
+				midBand = math.Min(r1, r2)
+			case minBand == r1:
+				midBand = math.Min(l, r2)
+			case minBand == r2:
+				midBand = math.Min(l, r1)
+			}
+			util += totalUtilOf(midBand, tailDur)
+		}
+
+		// Add the total mean MU of bands that are completely
+		// overlapped by all windows.
+		if minBands > 2 {
+			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
+		}
+
+		bandU[i] = bandUtil{series, i, util.mean(window)}
+	}
+
+	return bandU
+}
+
+// bandMMU computes the precise minimum mutator utilization for
+// windows with a left edge in band bandIdx.
+func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
+	util := c.util
+
+	// We think of the mutator utilization over time as the
+	// box-filtered utilization function, which we call the
+	// "windowed mutator utilization function". The resulting
+	// function is continuous and piecewise linear (unless
+	// window==0, which we handle elsewhere), where the boundaries
+	// between segments occur when either edge of the window
+	// encounters a change in the instantaneous mutator
+	// utilization function. Hence, the minimum of this function
+	// will always occur when one of the edges of the window
+	// aligns with a utilization change, so these are the only
+	// points we need to consider.
+	//
+	// We compute the mutator utilization function incrementally
+	// by tracking the integral from t=0 to the left edge of the
+	// window and to the right edge of the window.
+	left := c.bands[bandIdx].integrator
+	right := left
+	time, endTime := c.bandTime(bandIdx)
+	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
+		endTime = utilEnd
+	}
+	acc.resetTime()
+	for {
+		// Advance edges to time and time+window.
+		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
+		if acc.addMU(time, mu, window) {
+			break
+		}
+		if time == endTime {
+			break
+		}
+
+		// The maximum slope of the windowed mutator
+		// utilization function is 1/window, so we can always
+		// advance the time by at least (mu - mmu) * window
+		// without dropping below mmu.
+		minTime := time + int64((mu-acc.bound)*float64(window))
+
+		// Advance the window to the next time where either
+		// the left or right edge of the window encounters a
+		// change in the utilization curve.
+		if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
+			time = t1
+		} else {
+			time = t2
+		}
+		if time < minTime {
+			time = minTime
+		}
+		if time >= endTime {
+			// For MMUs we could stop here, but for MUDs
+			// it's important that we span the entire
+			// band.
+			time = endTime
+		}
+	}
+}
+
+// An integrator tracks a position in a utilization function and
+// integrates it.
+type integrator struct {
+	u *mmuSeries
+	// pos is the index in u.util of the current time's non-strict
+	// predecessor.
+	pos int
+}
+
+// advance returns the integral of the utilization function from 0 to
+// time. advance must be called on monotonically increasing values of
+// times.
+func (in *integrator) advance(time int64) totalUtil {
+	util, pos := in.u.util, in.pos
+	// Advance pos until pos+1 is time's strict successor (making
+	// pos time's non-strict predecessor).
+	//
+	// Very often, this will be nearby, so we optimize that case,
+	// but it may be arbitrarily far away, so we handled that
+	// efficiently, too.
+	const maxSeq = 8
+	if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
+		// Nearby. Use a linear scan.
+		for pos+1 < len(util) && util[pos+1].Time <= time {
+			pos++
+		}
+	} else {
+		// Far. Binary search for time's strict successor.
+		l, r := pos, len(util)
+		for l < r {
+			h := int(uint(l+r) >> 1)
+			if util[h].Time <= time {
+				l = h + 1
+			} else {
+				r = h
+			}
+		}
+		pos = l - 1 // Non-strict predecessor.
+	}
+	in.pos = pos
+	var partial totalUtil
+	if time != util[pos].Time {
+		partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
+	}
+	return in.u.sums[pos] + partial
+}
+
+// next returns the smallest time t' > time of a change in the
+// utilization function.
+func (in *integrator) next(time int64) int64 {
+	for _, u := range in.u.util[in.pos:] {
+		if u.Time > time {
+			return u.Time
+		}
+	}
+	return 1<<63 - 1
+}
diff --git a/src/internal/trace/gc_test.go b/src/internal/trace/gc_test.go
new file mode 100644
index 0000000..9b9771e
--- /dev/null
+++ b/src/internal/trace/gc_test.go
@@ -0,0 +1,202 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bytes"
+	"math"
+	"os"
+	"testing"
+	"time"
+)
+
+// aeq returns true if x and y are equal up to 8 digits (1 part in 100
+// million).
+func aeq(x, y float64) bool {
+	if x < 0 && y < 0 {
+		x, y = -x, -y
+	}
+	const digits = 8
+	factor := 1 - math.Pow(10, -digits+1)
+	return x*factor <= y && y*factor <= x
+}
+
+func TestMMU(t *testing.T) {
+	t.Parallel()
+
+	// MU
+	// 1.0  *****   *****   *****
+	// 0.5      *   *   *   *
+	// 0.0      *****   *****
+	//      0   1   2   3   4   5
+	util := [][]MutatorUtil{{
+		{0e9, 1},
+		{1e9, 0},
+		{2e9, 1},
+		{3e9, 0},
+		{4e9, 1},
+		{5e9, 0},
+	}}
+	mmuCurve := NewMMUCurve(util)
+
+	for _, test := range []struct {
+		window time.Duration
+		want   float64
+		worst  []float64
+	}{
+		{0, 0, []float64{}},
+		{time.Millisecond, 0, []float64{0, 0}},
+		{time.Second, 0, []float64{0, 0}},
+		{2 * time.Second, 0.5, []float64{0.5, 0.5}},
+		{3 * time.Second, 1 / 3.0, []float64{1 / 3.0}},
+		{4 * time.Second, 0.5, []float64{0.5}},
+		{5 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+		{6 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+	} {
+		if got := mmuCurve.MMU(test.window); !aeq(test.want, got) {
+			t.Errorf("for %s window, want mu = %f, got %f", test.window, test.want, got)
+		}
+		worst := mmuCurve.Examples(test.window, 2)
+		// Which exact windows are returned is unspecified
+		// (and depends on the exact banding), so we just
+		// check that we got the right number with the right
+		// utilizations.
+		if len(worst) != len(test.worst) {
+			t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+		} else {
+			for i := range worst {
+				if worst[i].MutatorUtil != test.worst[i] {
+					t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+					break
+				}
+			}
+		}
+	}
+}
+
+func TestMMUTrace(t *testing.T) {
+	// Can't be t.Parallel() because it modifies the
+	// testingOneBand package variable.
+	if testing.Short() {
+		// test input too big for all.bash
+		t.Skip("skipping in -short mode")
+	}
+
+	data, err := os.ReadFile("testdata/stress_1_10_good")
+	if err != nil {
+		t.Fatalf("failed to read input file: %v", err)
+	}
+	_, events, err := parse(bytes.NewReader(data), "")
+	if err != nil {
+		t.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist)
+	mmuCurve := NewMMUCurve(mu)
+
+	// Test the optimized implementation against the "obviously
+	// correct" implementation.
+	for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
+		want := mmuSlow(mu[0], window)
+		got := mmuCurve.MMU(window)
+		if !aeq(want, got) {
+			t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
+		}
+	}
+
+	// Test MUD with band optimization against MUD without band
+	// optimization. We don't have a simple testing implementation
+	// of MUDs (the simplest implementation is still quite
+	// complex), but this is still a pretty good test.
+	defer func(old int) { bandsPerSeries = old }(bandsPerSeries)
+	bandsPerSeries = 1
+	mmuCurve2 := NewMMUCurve(mu)
+	quantiles := []float64{0, 1 - .999, 1 - .99}
+	for window := time.Microsecond; window < time.Second; window *= 10 {
+		mud1 := mmuCurve.MUD(window, quantiles)
+		mud2 := mmuCurve2.MUD(window, quantiles)
+		for i := range mud1 {
+			if !aeq(mud1[i], mud2[i]) {
+				t.Errorf("for quantiles %v at window %v, want %v, got %v", quantiles, window, mud2, mud1)
+				break
+			}
+		}
+	}
+}
+
+func BenchmarkMMU(b *testing.B) {
+	data, err := os.ReadFile("testdata/stress_1_10_good")
+	if err != nil {
+		b.Fatalf("failed to read input file: %v", err)
+	}
+	_, events, err := parse(bytes.NewReader(data), "")
+	if err != nil {
+		b.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := MutatorUtilization(events.Events, UtilSTW|UtilBackground|UtilAssist|UtilSweep)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mmuCurve := NewMMUCurve(mu)
+		xMin, xMax := time.Microsecond, time.Second
+		logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+		const samples = 100
+		for i := 0; i < samples; i++ {
+			window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+			mmuCurve.MMU(window)
+		}
+	}
+}
+
+func mmuSlow(util []MutatorUtil, window time.Duration) (mmu float64) {
+	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
+		window = max
+	}
+
+	mmu = 1.0
+
+	// muInWindow returns the mean mutator utilization between
+	// util[0].Time and end.
+	muInWindow := func(util []MutatorUtil, end int64) float64 {
+		total := 0.0
+		var prevU MutatorUtil
+		for _, u := range util {
+			if u.Time > end {
+				total += prevU.Util * float64(end-prevU.Time)
+				break
+			}
+			total += prevU.Util * float64(u.Time-prevU.Time)
+			prevU = u
+		}
+		return total / float64(end-util[0].Time)
+	}
+	update := func() {
+		for i, u := range util {
+			if u.Time+int64(window) > util[len(util)-1].Time {
+				break
+			}
+			mmu = math.Min(mmu, muInWindow(util[i:], u.Time+int64(window)))
+		}
+	}
+
+	// Consider all left-aligned windows.
+	update()
+	// Reverse the trace. Slightly subtle because each MutatorUtil
+	// is a *change*.
+	rutil := make([]MutatorUtil, len(util))
+	if util[len(util)-1].Util != 0 {
+		panic("irreversible trace")
+	}
+	for i, u := range util {
+		util1 := 0.0
+		if i != 0 {
+			util1 = util[i-1].Util
+		}
+		rutil[len(rutil)-i-1] = MutatorUtil{Time: -u.Time, Util: util1}
+	}
+	util = rutil
+	// Consider all right-aligned windows.
+	update()
+	return
+}
diff --git a/src/internal/trace/goroutines.go b/src/internal/trace/goroutines.go
new file mode 100644
index 0000000..a5fda48
--- /dev/null
+++ b/src/internal/trace/goroutines.go
@@ -0,0 +1,338 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "sort"
+
+// GDesc contains statistics and execution details of a single goroutine.
+type GDesc struct {
+	ID           uint64
+	Name         string
+	PC           uint64
+	CreationTime int64
+	StartTime    int64
+	EndTime      int64
+
+	// List of regions in the goroutine, sorted based on the start time.
+	Regions []*UserRegionDesc
+
+	// Statistics of execution time during the goroutine execution.
+	GExecutionStat
+
+	*gdesc // private part.
+}
+
+// UserRegionDesc represents a region and goroutine execution stats
+// while the region was active.
+type UserRegionDesc struct {
+	TaskID uint64
+	Name   string
+
+	// Region start event. Normally EvUserRegion start event or nil,
+	// but can be EvGoCreate event if the region is a synthetic
+	// region representing task inheritance from the parent goroutine.
+	Start *Event
+
+	// Region end event. Normally EvUserRegion end event or nil,
+	// but can be EvGoStop or EvGoEnd event if the goroutine
+	// terminated without explicitly ending the region.
+	End *Event
+
+	GExecutionStat
+}
+
+// GExecutionStat contains statistics about a goroutine's execution
+// during a period of time.
+type GExecutionStat struct {
+	ExecTime      int64
+	SchedWaitTime int64
+	IOTime        int64
+	BlockTime     int64
+	SyscallTime   int64
+	GCTime        int64
+	SweepTime     int64
+	TotalTime     int64
+}
+
+// sub returns the stats v-s.
+func (s GExecutionStat) sub(v GExecutionStat) (r GExecutionStat) {
+	r = s
+	r.ExecTime -= v.ExecTime
+	r.SchedWaitTime -= v.SchedWaitTime
+	r.IOTime -= v.IOTime
+	r.BlockTime -= v.BlockTime
+	r.SyscallTime -= v.SyscallTime
+	r.GCTime -= v.GCTime
+	r.SweepTime -= v.SweepTime
+	r.TotalTime -= v.TotalTime
+	return r
+}
+
+// snapshotStat returns the snapshot of the goroutine execution statistics.
+// This is called as we process the ordered trace event stream. lastTs and
+// activeGCStartTime are used to process pending statistics if this is called
+// before any goroutine end event.
+func (g *GDesc) snapshotStat(lastTs, activeGCStartTime int64) (ret GExecutionStat) {
+	ret = g.GExecutionStat
+
+	if g.gdesc == nil {
+		return ret // finalized GDesc. No pending state.
+	}
+
+	if activeGCStartTime != 0 { // terminating while GC is active
+		if g.CreationTime < activeGCStartTime {
+			ret.GCTime += lastTs - activeGCStartTime
+		} else {
+			// The goroutine's lifetime completely overlaps
+			// with a GC.
+			ret.GCTime += lastTs - g.CreationTime
+		}
+	}
+
+	if g.TotalTime == 0 {
+		ret.TotalTime = lastTs - g.CreationTime
+	}
+
+	if g.lastStartTime != 0 {
+		ret.ExecTime += lastTs - g.lastStartTime
+	}
+	if g.blockNetTime != 0 {
+		ret.IOTime += lastTs - g.blockNetTime
+	}
+	if g.blockSyncTime != 0 {
+		ret.BlockTime += lastTs - g.blockSyncTime
+	}
+	if g.blockSyscallTime != 0 {
+		ret.SyscallTime += lastTs - g.blockSyscallTime
+	}
+	if g.blockSchedTime != 0 {
+		ret.SchedWaitTime += lastTs - g.blockSchedTime
+	}
+	if g.blockSweepTime != 0 {
+		ret.SweepTime += lastTs - g.blockSweepTime
+	}
+	return ret
+}
+
+// finalize is called when processing a goroutine end event or at
+// the end of trace processing. This finalizes the execution stat
+// and any active regions in the goroutine, in which case trigger is nil.
+func (g *GDesc) finalize(lastTs, activeGCStartTime int64, trigger *Event) {
+	if trigger != nil {
+		g.EndTime = trigger.Ts
+	}
+	finalStat := g.snapshotStat(lastTs, activeGCStartTime)
+
+	g.GExecutionStat = finalStat
+	for _, s := range g.activeRegions {
+		s.End = trigger
+		s.GExecutionStat = finalStat.sub(s.GExecutionStat)
+		g.Regions = append(g.Regions, s)
+	}
+	*(g.gdesc) = gdesc{}
+}
+
+// gdesc is a private part of GDesc that is required only during analysis.
+type gdesc struct {
+	lastStartTime    int64
+	blockNetTime     int64
+	blockSyncTime    int64
+	blockSyscallTime int64
+	blockSweepTime   int64
+	blockGCTime      int64
+	blockSchedTime   int64
+
+	activeRegions []*UserRegionDesc // stack of active regions
+}
+
+// GoroutineStats generates statistics for all goroutines in the trace.
+func GoroutineStats(events []*Event) map[uint64]*GDesc {
+	gs := make(map[uint64]*GDesc)
+	var lastTs int64
+	var gcStartTime int64 // gcStartTime == 0 indicates gc is inactive.
+	for _, ev := range events {
+		lastTs = ev.Ts
+		switch ev.Type {
+		case EvGoCreate:
+			g := &GDesc{ID: ev.Args[0], CreationTime: ev.Ts, gdesc: new(gdesc)}
+			g.blockSchedTime = ev.Ts
+			// When a goroutine is newly created, inherit the
+			// task of the active region. For ease handling of
+			// this case, we create a fake region description with
+			// the task id.
+			if creatorG := gs[ev.G]; creatorG != nil && len(creatorG.gdesc.activeRegions) > 0 {
+				regions := creatorG.gdesc.activeRegions
+				s := regions[len(regions)-1]
+				if s.TaskID != 0 {
+					g.gdesc.activeRegions = []*UserRegionDesc{
+						{TaskID: s.TaskID, Start: ev},
+					}
+				}
+			}
+			gs[g.ID] = g
+		case EvGoStart, EvGoStartLabel:
+			g := gs[ev.G]
+			if g.PC == 0 {
+				g.PC = ev.Stk[0].PC
+				g.Name = ev.Stk[0].Fn
+			}
+			g.lastStartTime = ev.Ts
+			if g.StartTime == 0 {
+				g.StartTime = ev.Ts
+			}
+			if g.blockSchedTime != 0 {
+				g.SchedWaitTime += ev.Ts - g.blockSchedTime
+				g.blockSchedTime = 0
+			}
+		case EvGoEnd, EvGoStop:
+			g := gs[ev.G]
+			g.finalize(ev.Ts, gcStartTime, ev)
+		case EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+			EvGoBlockSync, EvGoBlockCond:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSyncTime = ev.Ts
+		case EvGoSched, EvGoPreempt:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSchedTime = ev.Ts
+		case EvGoSleep, EvGoBlock:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+		case EvGoBlockNet:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockNetTime = ev.Ts
+		case EvGoBlockGC:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockGCTime = ev.Ts
+		case EvGoUnblock:
+			g := gs[ev.Args[0]]
+			if g.blockNetTime != 0 {
+				g.IOTime += ev.Ts - g.blockNetTime
+				g.blockNetTime = 0
+			}
+			if g.blockSyncTime != 0 {
+				g.BlockTime += ev.Ts - g.blockSyncTime
+				g.blockSyncTime = 0
+			}
+			g.blockSchedTime = ev.Ts
+		case EvGoSysBlock:
+			g := gs[ev.G]
+			g.ExecTime += ev.Ts - g.lastStartTime
+			g.lastStartTime = 0
+			g.blockSyscallTime = ev.Ts
+		case EvGoSysExit:
+			g := gs[ev.G]
+			if g.blockSyscallTime != 0 {
+				g.SyscallTime += ev.Ts - g.blockSyscallTime
+				g.blockSyscallTime = 0
+			}
+			g.blockSchedTime = ev.Ts
+		case EvGCSweepStart:
+			g := gs[ev.G]
+			if g != nil {
+				// Sweep can happen during GC on system goroutine.
+				g.blockSweepTime = ev.Ts
+			}
+		case EvGCSweepDone:
+			g := gs[ev.G]
+			if g != nil && g.blockSweepTime != 0 {
+				g.SweepTime += ev.Ts - g.blockSweepTime
+				g.blockSweepTime = 0
+			}
+		case EvGCStart:
+			gcStartTime = ev.Ts
+		case EvGCDone:
+			for _, g := range gs {
+				if g.EndTime != 0 {
+					continue
+				}
+				if gcStartTime < g.CreationTime {
+					g.GCTime += ev.Ts - g.CreationTime
+				} else {
+					g.GCTime += ev.Ts - gcStartTime
+				}
+			}
+			gcStartTime = 0 // indicates gc is inactive.
+		case EvUserRegion:
+			g := gs[ev.G]
+			switch mode := ev.Args[1]; mode {
+			case 0: // region start
+				g.activeRegions = append(g.activeRegions, &UserRegionDesc{
+					Name:           ev.SArgs[0],
+					TaskID:         ev.Args[0],
+					Start:          ev,
+					GExecutionStat: g.snapshotStat(lastTs, gcStartTime),
+				})
+			case 1: // region end
+				var sd *UserRegionDesc
+				if regionStk := g.activeRegions; len(regionStk) > 0 {
+					n := len(regionStk)
+					sd = regionStk[n-1]
+					regionStk = regionStk[:n-1] // pop
+					g.activeRegions = regionStk
+				} else {
+					sd = &UserRegionDesc{
+						Name:   ev.SArgs[0],
+						TaskID: ev.Args[0],
+					}
+				}
+				sd.GExecutionStat = g.snapshotStat(lastTs, gcStartTime).sub(sd.GExecutionStat)
+				sd.End = ev
+				g.Regions = append(g.Regions, sd)
+			}
+		}
+	}
+
+	for _, g := range gs {
+		g.finalize(lastTs, gcStartTime, nil)
+
+		// sort based on region start time
+		sort.Slice(g.Regions, func(i, j int) bool {
+			x := g.Regions[i].Start
+			y := g.Regions[j].Start
+			if x == nil {
+				return true
+			}
+			if y == nil {
+				return false
+			}
+			return x.Ts < y.Ts
+		})
+
+		g.gdesc = nil
+	}
+
+	return gs
+}
+
+// RelatedGoroutines finds a set of goroutines related to goroutine goid.
+func RelatedGoroutines(events []*Event, goid uint64) map[uint64]bool {
+	// BFS of depth 2 over "unblock" edges
+	// (what goroutines unblock goroutine goid?).
+	gmap := make(map[uint64]bool)
+	gmap[goid] = true
+	for i := 0; i < 2; i++ {
+		gmap1 := make(map[uint64]bool)
+		for g := range gmap {
+			gmap1[g] = true
+		}
+		for _, ev := range events {
+			if ev.Type == EvGoUnblock && gmap[ev.Args[0]] {
+				gmap1[ev.G] = true
+			}
+		}
+		gmap = gmap1
+	}
+	gmap[0] = true // for GC events
+	return gmap
+}
diff --git a/src/internal/trace/mkcanned.bash b/src/internal/trace/mkcanned.bash
new file mode 100755
index 0000000..b365b90
--- /dev/null
+++ b/src/internal/trace/mkcanned.bash
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Copyright 2016 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# mkcanned.bash creates canned traces for the trace test suite using
+# the current Go version.
+
+set -e
+
+if [ $# != 1 ]; then
+    echo "usage: $0 <label>" >&2
+    exit 1
+fi
+
+go test -run ClientServerParallel4 -trace "testdata/http_$1_good" net/http
+go test -run 'TraceStress$|TraceStressStartStop$|TestUserTaskSpan$' runtime/trace -savetraces
+mv ../../runtime/trace/TestTraceStress.trace "testdata/stress_$1_good"
+mv ../../runtime/trace/TestTraceStressStartStop.trace "testdata/stress_start_stop_$1_good"
+mv ../../runtime/trace/TestUserTaskSpan.trace "testdata/user_task_span_$1_good"
diff --git a/src/internal/trace/mud.go b/src/internal/trace/mud.go
new file mode 100644
index 0000000..8826306
--- /dev/null
+++ b/src/internal/trace/mud.go
@@ -0,0 +1,223 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"math"
+	"sort"
+)
+
+// mud is an updatable mutator utilization distribution.
+//
+// This is a continuous distribution of duration over mutator
+// utilization. For example, the integral from mutator utilization a
+// to b is the total duration during which the mutator utilization was
+// in the range [a, b].
+//
+// This distribution is *not* normalized (it is not a probability
+// distribution). This makes it easier to work with as it's being
+// updated.
+//
+// It is represented as the sum of scaled uniform distribution
+// functions and Dirac delta functions (which are treated as
+// degenerate uniform distributions).
+type mud struct {
+	sorted, unsorted []edge
+
+	// trackMass is the inverse cumulative sum to track as the
+	// distribution is updated.
+	trackMass float64
+	// trackBucket is the bucket in which trackMass falls. If the
+	// total mass of the distribution is < trackMass, this is
+	// len(hist).
+	trackBucket int
+	// trackSum is the cumulative sum of hist[:trackBucket]. Once
+	// trackSum >= trackMass, trackBucket must be recomputed.
+	trackSum float64
+
+	// hist is a hierarchical histogram of distribution mass.
+	hist [mudDegree]float64
+}
+
+const (
+	// mudDegree is the number of buckets in the MUD summary
+	// histogram.
+	mudDegree = 1024
+)
+
+type edge struct {
+	// At x, the function increases by y.
+	x, delta float64
+	// Additionally at x is a Dirac delta function with area dirac.
+	dirac float64
+}
+
+// add adds a uniform function over [l, r] scaled so the total weight
+// of the uniform is area. If l==r, this adds a Dirac delta function.
+func (d *mud) add(l, r, area float64) {
+	if area == 0 {
+		return
+	}
+
+	if r < l {
+		l, r = r, l
+	}
+
+	// Add the edges.
+	if l == r {
+		d.unsorted = append(d.unsorted, edge{l, 0, area})
+	} else {
+		delta := area / (r - l)
+		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
+	}
+
+	// Update the histogram.
+	h := &d.hist
+	lbFloat, lf := math.Modf(l * mudDegree)
+	lb := int(lbFloat)
+	if lb >= mudDegree {
+		lb, lf = mudDegree-1, 1
+	}
+	if l == r {
+		h[lb] += area
+	} else {
+		rbFloat, rf := math.Modf(r * mudDegree)
+		rb := int(rbFloat)
+		if rb >= mudDegree {
+			rb, rf = mudDegree-1, 1
+		}
+		if lb == rb {
+			h[lb] += area
+		} else {
+			perBucket := area / (r - l) / mudDegree
+			h[lb] += perBucket * (1 - lf)
+			h[rb] += perBucket * rf
+			for i := lb + 1; i < rb; i++ {
+				h[i] += perBucket
+			}
+		}
+	}
+
+	// Update mass tracking.
+	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
+		if r < thresh {
+			d.trackSum += area
+		} else {
+			d.trackSum += area * (thresh - l) / (r - l)
+		}
+		if d.trackSum >= d.trackMass {
+			// The tracked mass now falls in a different
+			// bucket. Recompute the inverse cumulative sum.
+			d.setTrackMass(d.trackMass)
+		}
+	}
+}
+
+// setTrackMass sets the mass to track the inverse cumulative sum for.
+//
+// Specifically, mass is a cumulative duration, and the mutator
+// utilization bounds for this duration can be queried using
+// approxInvCumulativeSum.
+func (d *mud) setTrackMass(mass float64) {
+	d.trackMass = mass
+
+	// Find the bucket currently containing trackMass by computing
+	// the cumulative sum.
+	sum := 0.0
+	for i, val := range d.hist[:] {
+		newSum := sum + val
+		if newSum > mass {
+			// mass falls in bucket i.
+			d.trackBucket = i
+			d.trackSum = sum
+			return
+		}
+		sum = newSum
+	}
+	d.trackBucket = len(d.hist)
+	d.trackSum = sum
+}
+
+// approxInvCumulativeSum is like invCumulativeSum, but specifically
+// operates on the tracked mass and returns an upper and lower bound
+// approximation of the inverse cumulative sum.
+//
+// The true inverse cumulative sum will be in the range [lower, upper).
+func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
+	if d.trackBucket == len(d.hist) {
+		return math.NaN(), math.NaN(), false
+	}
+	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
+}
+
+// invCumulativeSum returns x such that the integral of d from -∞ to x
+// is y. If the total weight of d is less than y, it returns the
+// maximum of the distribution and false.
+//
+// Specifically, y is a cumulative duration, and invCumulativeSum
+// returns the mutator utilization x such that at least y time has
+// been spent with mutator utilization <= x.
+func (d *mud) invCumulativeSum(y float64) (float64, bool) {
+	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
+		return math.NaN(), false
+	}
+
+	// Sort edges.
+	edges := d.unsorted
+	sort.Slice(edges, func(i, j int) bool {
+		return edges[i].x < edges[j].x
+	})
+	// Merge with sorted edges.
+	d.unsorted = nil
+	if d.sorted == nil {
+		d.sorted = edges
+	} else {
+		oldSorted := d.sorted
+		newSorted := make([]edge, len(oldSorted)+len(edges))
+		i, j := 0, 0
+		for o := range newSorted {
+			if i >= len(oldSorted) {
+				copy(newSorted[o:], edges[j:])
+				break
+			} else if j >= len(edges) {
+				copy(newSorted[o:], oldSorted[i:])
+				break
+			} else if oldSorted[i].x < edges[j].x {
+				newSorted[o] = oldSorted[i]
+				i++
+			} else {
+				newSorted[o] = edges[j]
+				j++
+			}
+		}
+		d.sorted = newSorted
+	}
+
+	// Traverse edges in order computing a cumulative sum.
+	csum, rate, prevX := 0.0, 0.0, 0.0
+	for _, e := range d.sorted {
+		newCsum := csum + (e.x-prevX)*rate
+		if newCsum >= y {
+			// y was exceeded between the previous edge
+			// and this one.
+			if rate == 0 {
+				// Anywhere between prevX and
+				// e.x will do. We return e.x
+				// because that takes care of
+				// the y==0 case naturally.
+				return e.x, true
+			}
+			return (y-csum)/rate + prevX, true
+		}
+		newCsum += e.dirac
+		if newCsum >= y {
+			// y was exceeded by the Dirac delta at e.x.
+			return e.x, true
+		}
+		csum, prevX = newCsum, e.x
+		rate += e.delta
+	}
+	return prevX, false
+}
diff --git a/src/internal/trace/mud_test.go b/src/internal/trace/mud_test.go
new file mode 100644
index 0000000..b3d74dc
--- /dev/null
+++ b/src/internal/trace/mud_test.go
@@ -0,0 +1,87 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"math/rand"
+	"testing"
+)
+
+func TestMUD(t *testing.T) {
+	// Insert random uniforms and check histogram mass and
+	// cumulative sum approximations.
+	rnd := rand.New(rand.NewSource(42))
+	mass := 0.0
+	var mud mud
+	for i := 0; i < 100; i++ {
+		area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+		if rnd.Intn(10) == 0 {
+			r = l
+		}
+		t.Log(l, r, area)
+		mud.add(l, r, area)
+		mass += area
+
+		// Check total histogram weight.
+		hmass := 0.0
+		for _, val := range mud.hist {
+			hmass += val
+		}
+		if !aeq(mass, hmass) {
+			t.Fatalf("want mass %g, got %g", mass, hmass)
+		}
+
+		// Check inverse cumulative sum approximations.
+		for j := 0.0; j < mass; j += mass * 0.099 {
+			mud.setTrackMass(j)
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(j)
+			if !ok || !ok2 {
+				t.Fatalf("inverse cumulative sum failed: approx %v, exact %v", ok, ok2)
+			}
+			if !(l <= inv && inv < u) {
+				t.Fatalf("inverse(%g) = %g, not ∈ [%g, %g)", j, inv, l, u)
+			}
+		}
+	}
+}
+
+func TestMUDTracking(t *testing.T) {
+	// Test that the tracked mass is tracked correctly across
+	// updates.
+	rnd := rand.New(rand.NewSource(42))
+	const uniforms = 100
+	for trackMass := 0.0; trackMass < uniforms; trackMass += uniforms / 50 {
+		var mud mud
+		mass := 0.0
+		mud.setTrackMass(trackMass)
+		for i := 0; i < uniforms; i++ {
+			area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+			mud.add(l, r, area)
+			mass += area
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(trackMass)
+
+			if mass < trackMass {
+				if ok {
+					t.Errorf("approx(%g) = [%g, %g), but mass = %g", trackMass, l, u, mass)
+				}
+				if ok2 {
+					t.Errorf("exact(%g) = %g, but mass = %g", trackMass, inv, mass)
+				}
+			} else {
+				if !ok {
+					t.Errorf("approx(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if !ok2 {
+					t.Errorf("exact(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if ok && ok2 && !(l <= inv && inv < u) {
+					t.Errorf("inverse(%g) = %g, not ∈ [%g, %g)", trackMass, inv, l, u)
+				}
+			}
+		}
+	}
+}
diff --git a/src/internal/trace/order.go b/src/internal/trace/order.go
new file mode 100644
index 0000000..36ed58d
--- /dev/null
+++ b/src/internal/trace/order.go
@@ -0,0 +1,279 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"fmt"
+	"sort"
+)
+
+type eventBatch struct {
+	events   []*Event
+	selected bool
+}
+
+type orderEvent struct {
+	ev    *Event
+	batch int
+	g     uint64
+	init  gState
+	next  gState
+}
+
+type gStatus int
+
+type gState struct {
+	seq    uint64
+	status gStatus
+}
+
+const (
+	gDead gStatus = iota
+	gRunnable
+	gRunning
+	gWaiting
+
+	unordered = ^uint64(0)
+	garbage   = ^uint64(0) - 1
+	noseq     = ^uint64(0)
+	seqinc    = ^uint64(0) - 1
+)
+
+// order1007 merges a set of per-P event batches into a single, consistent stream.
+// The high level idea is as follows. Events within an individual batch are in
+// correct order, because they are emitted by a single P. So we need to produce
+// a correct interleaving of the batches. To do this we take first unmerged event
+// from each batch (frontier). Then choose subset that is "ready" to be merged,
+// that is, events for which all dependencies are already merged. Then we choose
+// event with the lowest timestamp from the subset, merge it and repeat.
+// This approach ensures that we form a consistent stream even if timestamps are
+// incorrect (condition observed on some machines).
+func order1007(m map[int][]*Event) (events []*Event, err error) {
+	pending := 0
+	var batches []*eventBatch
+	for _, v := range m {
+		pending += len(v)
+		batches = append(batches, &eventBatch{v, false})
+	}
+	gs := make(map[uint64]gState)
+	var frontier []orderEvent
+	for ; pending != 0; pending-- {
+		for i, b := range batches {
+			if b.selected || len(b.events) == 0 {
+				continue
+			}
+			ev := b.events[0]
+			g, init, next := stateTransition(ev)
+			if !transitionReady(g, gs[g], init) {
+				continue
+			}
+			frontier = append(frontier, orderEvent{ev, i, g, init, next})
+			b.events = b.events[1:]
+			b.selected = true
+			// Get rid of "Local" events, they are intended merely for ordering.
+			switch ev.Type {
+			case EvGoStartLocal:
+				ev.Type = EvGoStart
+			case EvGoUnblockLocal:
+				ev.Type = EvGoUnblock
+			case EvGoSysExitLocal:
+				ev.Type = EvGoSysExit
+			}
+		}
+		if len(frontier) == 0 {
+			return nil, fmt.Errorf("no consistent ordering of events possible")
+		}
+		sort.Sort(orderEventList(frontier))
+		f := frontier[0]
+		frontier[0] = frontier[len(frontier)-1]
+		frontier = frontier[:len(frontier)-1]
+		events = append(events, f.ev)
+		transition(gs, f.g, f.init, f.next)
+		if !batches[f.batch].selected {
+			panic("frontier batch is not selected")
+		}
+		batches[f.batch].selected = false
+	}
+
+	// At this point we have a consistent stream of events.
+	// Make sure time stamps respect the ordering.
+	// The tests will skip (not fail) the test case if they see this error.
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+
+	// The last part is giving correct timestamps to EvGoSysExit events.
+	// The problem with EvGoSysExit is that actual syscall exit timestamp (ev.Args[2])
+	// is potentially acquired long before event emission. So far we've used
+	// timestamp of event emission (ev.Ts).
+	// We could not set ev.Ts = ev.Args[2] earlier, because it would produce
+	// seemingly broken timestamps (misplaced event).
+	// We also can't simply update the timestamp and resort events, because
+	// if timestamps are broken we will misplace the event and later report
+	// logically broken trace (instead of reporting broken timestamps).
+	lastSysBlock := make(map[uint64]int64)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoSysBlock, EvGoInSyscall:
+			lastSysBlock[ev.G] = ev.Ts
+		case EvGoSysExit:
+			ts := int64(ev.Args[2])
+			if ts == 0 {
+				continue
+			}
+			block := lastSysBlock[ev.G]
+			if block == 0 {
+				return nil, fmt.Errorf("stray syscall exit")
+			}
+			if ts < block {
+				return nil, ErrTimeOrder
+			}
+			ev.Ts = ts
+		}
+	}
+	sort.Stable(eventList(events))
+
+	return
+}
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+	switch ev.Type {
+	case EvGoCreate:
+		g = ev.Args[0]
+		init = gState{0, gDead}
+		next = gState{1, gRunnable}
+	case EvGoWaiting, EvGoInSyscall:
+		g = ev.G
+		init = gState{1, gRunnable}
+		next = gState{2, gWaiting}
+	case EvGoStart, EvGoStartLabel:
+		g = ev.G
+		init = gState{ev.Args[1], gRunnable}
+		next = gState{ev.Args[1] + 1, gRunning}
+	case EvGoStartLocal:
+		// noseq means that this event is ready for merging as soon as
+		// frontier reaches it (EvGoStartLocal is emitted on the same P
+		// as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+		// is already merged).
+		// seqinc is a stub for cases when event increments g sequence,
+		// but since we don't know current seq we also don't know next seq.
+		g = ev.G
+		init = gState{noseq, gRunnable}
+		next = gState{seqinc, gRunning}
+	case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+		EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep,
+		EvGoSysBlock, EvGoBlockGC:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gWaiting}
+	case EvGoSched, EvGoPreempt:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gRunnable}
+	case EvGoUnblock, EvGoSysExit:
+		g = ev.Args[0]
+		init = gState{ev.Args[1], gWaiting}
+		next = gState{ev.Args[1] + 1, gRunnable}
+	case EvGoUnblockLocal, EvGoSysExitLocal:
+		g = ev.Args[0]
+		init = gState{noseq, gWaiting}
+		next = gState{seqinc, gRunnable}
+	case EvGCStart:
+		g = garbage
+		init = gState{ev.Args[0], gDead}
+		next = gState{ev.Args[0] + 1, gDead}
+	default:
+		// no ordering requirements
+		g = unordered
+	}
+	return
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+	return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) {
+	if g == unordered {
+		return
+	}
+	curr := gs[g]
+	if !transitionReady(g, curr, init) {
+		panic("event sequences are broken")
+	}
+	switch next.seq {
+	case noseq:
+		next.seq = curr.seq
+	case seqinc:
+		next.seq = curr.seq + 1
+	}
+	gs[g] = next
+}
+
+// order1005 merges a set of per-P event batches into a single, consistent stream.
+func order1005(m map[int][]*Event) (events []*Event, err error) {
+	for _, batch := range m {
+		events = append(events, batch...)
+	}
+	for _, ev := range events {
+		if ev.Type == EvGoSysExit {
+			// EvGoSysExit emission is delayed until the thread has a P.
+			// Give it the real sequence number and time stamp.
+			ev.seq = int64(ev.Args[1])
+			if ev.Args[2] != 0 {
+				ev.Ts = int64(ev.Args[2])
+			}
+		}
+	}
+	sort.Sort(eventSeqList(events))
+	if !sort.IsSorted(eventList(events)) {
+		return nil, ErrTimeOrder
+	}
+	return
+}
+
+type orderEventList []orderEvent
+
+func (l orderEventList) Len() int {
+	return len(l)
+}
+
+func (l orderEventList) Less(i, j int) bool {
+	return l[i].ev.Ts < l[j].ev.Ts
+}
+
+func (l orderEventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventList []*Event
+
+func (l eventList) Len() int {
+	return len(l)
+}
+
+func (l eventList) Less(i, j int) bool {
+	return l[i].Ts < l[j].Ts
+}
+
+func (l eventList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
+
+type eventSeqList []*Event
+
+func (l eventSeqList) Len() int {
+	return len(l)
+}
+
+func (l eventSeqList) Less(i, j int) bool {
+	return l[i].seq < l[j].seq
+}
+
+func (l eventSeqList) Swap(i, j int) {
+	l[i], l[j] = l[j], l[i]
+}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
new file mode 100644
index 0000000..254f201
--- /dev/null
+++ b/src/internal/trace/parser.go
@@ -0,0 +1,1120 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	_ "unsafe"
+)
+
+func goCmd() string {
+	var exeSuffix string
+	if runtime.GOOS == "windows" {
+		exeSuffix = ".exe"
+	}
+	path := filepath.Join(runtime.GOROOT(), "bin", "go"+exeSuffix)
+	if _, err := os.Stat(path); err == nil {
+		return path
+	}
+	return "go"
+}
+
+// Event describes one event in the trace.
+type Event struct {
+	Off   int       // offset in input file (for debugging and error reporting)
+	Type  byte      // one of Ev*
+	seq   int64     // sequence number
+	Ts    int64     // timestamp in nanoseconds
+	P     int       // P on which the event happened (can be one of TimerP, NetpollP, SyscallP)
+	G     uint64    // G on which the event happened
+	StkID uint64    // unique stack ID
+	Stk   []*Frame  // stack trace (can be empty)
+	Args  [3]uint64 // event-type-specific arguments
+	SArgs []string  // event-type-specific string args
+	// linked event (can be nil), depends on event type:
+	// for GCStart: the GCStop
+	// for GCSTWStart: the GCSTWDone
+	// for GCSweepStart: the GCSweepDone
+	// for GoCreate: first GoStart of the created goroutine
+	// for GoStart/GoStartLabel: the associated GoEnd, GoBlock or other blocking event
+	// for GoSched/GoPreempt: the next GoStart
+	// for GoBlock and other blocking events: the unblock event
+	// for GoUnblock: the associated GoStart
+	// for blocking GoSysCall: the associated GoSysExit
+	// for GoSysExit: the next GoStart
+	// for GCMarkAssistStart: the associated GCMarkAssistDone
+	// for UserTaskCreate: the UserTaskEnd
+	// for UserRegion: if the start region, the corresponding UserRegion end event
+	Link *Event
+}
+
+// Frame is a frame in stack traces.
+type Frame struct {
+	PC   uint64
+	Fn   string
+	File string
+	Line int
+}
+
+const (
+	// Special P identifiers:
+	FakeP    = 1000000 + iota
+	TimerP   // depicts timer unblocks
+	NetpollP // depicts network unblocks
+	SyscallP // depicts returns from syscalls
+	GCP      // depicts GC state
+)
+
+// ParseResult is the result of Parse.
+type ParseResult struct {
+	// Events is the sorted list of Events in the trace.
+	Events []*Event
+	// Stacks is the stack traces keyed by stack IDs from the trace.
+	Stacks map[uint64][]*Frame
+}
+
+// Parse parses, post-processes and verifies the trace.
+func Parse(r io.Reader, bin string) (ParseResult, error) {
+	ver, res, err := parse(r, bin)
+	if err != nil {
+		return ParseResult{}, err
+	}
+	if ver < 1007 && bin == "" {
+		return ParseResult{}, fmt.Errorf("for traces produced by go 1.6 or below, the binary argument must be provided")
+	}
+	return res, nil
+}
+
+// parse parses, post-processes and verifies the trace. It returns the
+// trace version and the list of events.
+func parse(r io.Reader, bin string) (int, ParseResult, error) {
+	ver, rawEvents, strings, err := readTrace(r)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	events, stacks, err := parseEvents(ver, rawEvents, strings)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	events = removeFutile(events)
+	err = postProcessTrace(ver, events)
+	if err != nil {
+		return 0, ParseResult{}, err
+	}
+	// Attach stack traces.
+	for _, ev := range events {
+		if ev.StkID != 0 {
+			ev.Stk = stacks[ev.StkID]
+		}
+	}
+	if ver < 1007 && bin != "" {
+		if err := symbolize(events, bin); err != nil {
+			return 0, ParseResult{}, err
+		}
+	}
+	return ver, ParseResult{Events: events, Stacks: stacks}, nil
+}
+
+// rawEvent is a helper type used during parsing.
+type rawEvent struct {
+	off   int
+	typ   byte
+	args  []uint64
+	sargs []string
+}
+
+// readTrace does wire-format parsing and verification.
+// It does not care about specific event types and argument meaning.
+func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]string, err error) {
+	// Read and validate trace header.
+	var buf [16]byte
+	off, err := io.ReadFull(r, buf[:])
+	if err != nil {
+		err = fmt.Errorf("failed to read header: read %v, err %v", off, err)
+		return
+	}
+	ver, err = parseHeader(buf[:])
+	if err != nil {
+		return
+	}
+	switch ver {
+	case 1005, 1007, 1008, 1009, 1010, 1011:
+		// Note: When adding a new version, add canned traces
+		// from the old version to the test suite using mkcanned.bash.
+		break
+	default:
+		err = fmt.Errorf("unsupported trace file version %v.%v (update Go toolchain) %v", ver/1000, ver%1000, ver)
+		return
+	}
+
+	// Read events.
+	strings = make(map[uint64]string)
+	for {
+		// Read event type and number of arguments (1 byte).
+		off0 := off
+		var n int
+		n, err = r.Read(buf[:1])
+		if err == io.EOF {
+			err = nil
+			break
+		}
+		if err != nil || n != 1 {
+			err = fmt.Errorf("failed to read trace at offset 0x%x: n=%v err=%v", off0, n, err)
+			return
+		}
+		off += n
+		typ := buf[0] << 2 >> 2
+		narg := buf[0]>>6 + 1
+		inlineArgs := byte(4)
+		if ver < 1007 {
+			narg++
+			inlineArgs++
+		}
+		if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > ver {
+			err = fmt.Errorf("unknown event type %v at offset 0x%x", typ, off0)
+			return
+		}
+		if typ == EvString {
+			// String dictionary entry [ID, length, string].
+			var id uint64
+			id, off, err = readVal(r, off)
+			if err != nil {
+				return
+			}
+			if id == 0 {
+				err = fmt.Errorf("string at offset %d has invalid id 0", off)
+				return
+			}
+			if strings[id] != "" {
+				err = fmt.Errorf("string at offset %d has duplicate id %v", off, id)
+				return
+			}
+			var ln uint64
+			ln, off, err = readVal(r, off)
+			if err != nil {
+				return
+			}
+			if ln == 0 {
+				err = fmt.Errorf("string at offset %d has invalid length 0", off)
+				return
+			}
+			if ln > 1e6 {
+				err = fmt.Errorf("string at offset %d has too large length %v", off, ln)
+				return
+			}
+			buf := make([]byte, ln)
+			var n int
+			n, err = io.ReadFull(r, buf)
+			if err != nil {
+				err = fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, ln, err)
+				return
+			}
+			off += n
+			strings[id] = string(buf)
+			continue
+		}
+		ev := rawEvent{typ: typ, off: off0}
+		if narg < inlineArgs {
+			for i := 0; i < int(narg); i++ {
+				var v uint64
+				v, off, err = readVal(r, off)
+				if err != nil {
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
+				}
+				ev.args = append(ev.args, v)
+			}
+		} else {
+			// More than inlineArgs args, the first value is length of the event in bytes.
+			var v uint64
+			v, off, err = readVal(r, off)
+			if err != nil {
+				err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+				return
+			}
+			evLen := v
+			off1 := off
+			for evLen > uint64(off-off1) {
+				v, off, err = readVal(r, off)
+				if err != nil {
+					err = fmt.Errorf("failed to read event %v argument at offset %v (%v)", typ, off, err)
+					return
+				}
+				ev.args = append(ev.args, v)
+			}
+			if evLen != uint64(off-off1) {
+				err = fmt.Errorf("event has wrong length at offset 0x%x: want %v, got %v", off0, evLen, off-off1)
+				return
+			}
+		}
+		switch ev.typ {
+		case EvUserLog: // EvUserLog records are followed by a value string of length ev.args[len(ev.args)-1]
+			var s string
+			s, off, err = readStr(r, off)
+			ev.sargs = append(ev.sargs, s)
+		}
+		events = append(events, ev)
+	}
+	return
+}
+
+func readStr(r io.Reader, off0 int) (s string, off int, err error) {
+	var sz uint64
+	sz, off, err = readVal(r, off0)
+	if err != nil || sz == 0 {
+		return "", off, err
+	}
+	if sz > 1e6 {
+		return "", off, fmt.Errorf("string at offset %d is too large (len=%d)", off, sz)
+	}
+	buf := make([]byte, sz)
+	n, err := io.ReadFull(r, buf)
+	if err != nil || sz != uint64(n) {
+		return "", off + n, fmt.Errorf("failed to read trace at offset %d: read %v, want %v, error %v", off, n, sz, err)
+	}
+	return string(buf), off + n, nil
+}
+
+// parseHeader parses trace header of the form "go 1.7 trace\x00\x00\x00\x00"
+// and returns parsed version as 1007.
+func parseHeader(buf []byte) (int, error) {
+	if len(buf) != 16 {
+		return 0, fmt.Errorf("bad header length")
+	}
+	if buf[0] != 'g' || buf[1] != 'o' || buf[2] != ' ' ||
+		buf[3] < '1' || buf[3] > '9' ||
+		buf[4] != '.' ||
+		buf[5] < '1' || buf[5] > '9' {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	ver := int(buf[5] - '0')
+	i := 0
+	for ; buf[6+i] >= '0' && buf[6+i] <= '9' && i < 2; i++ {
+		ver = ver*10 + int(buf[6+i]-'0')
+	}
+	ver += int(buf[3]-'0') * 1000
+	if !bytes.Equal(buf[6+i:], []byte(" trace\x00\x00\x00\x00")[:10-i]) {
+		return 0, fmt.Errorf("not a trace file")
+	}
+	return ver, nil
+}
+
+// Parse events transforms raw events into events.
+// It does analyze and verify per-event-type arguments.
+func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (events []*Event, stacks map[uint64][]*Frame, err error) {
+	var ticksPerSec, lastSeq, lastTs int64
+	var lastG uint64
+	var lastP int
+	timerGoids := make(map[uint64]bool)
+	lastGs := make(map[int]uint64) // last goroutine running on P
+	stacks = make(map[uint64][]*Frame)
+	batches := make(map[int][]*Event) // events by P
+	for _, raw := range rawEvents {
+		desc := EventDescriptions[raw.typ]
+		if desc.Name == "" {
+			err = fmt.Errorf("missing description for event type %v", raw.typ)
+			return
+		}
+		narg := argNum(raw, ver)
+		if len(raw.args) != narg {
+			err = fmt.Errorf("%v has wrong number of arguments at offset 0x%x: want %v, got %v",
+				desc.Name, raw.off, narg, len(raw.args))
+			return
+		}
+		switch raw.typ {
+		case EvBatch:
+			lastGs[lastP] = lastG
+			lastP = int(raw.args[0])
+			lastG = lastGs[lastP]
+			if ver < 1007 {
+				lastSeq = int64(raw.args[1])
+				lastTs = int64(raw.args[2])
+			} else {
+				lastTs = int64(raw.args[1])
+			}
+		case EvFrequency:
+			ticksPerSec = int64(raw.args[0])
+			if ticksPerSec <= 0 {
+				// The most likely cause for this is tick skew on different CPUs.
+				// For example, solaris/amd64 seems to have wildly different
+				// ticks on different CPUs.
+				err = ErrTimeOrder
+				return
+			}
+		case EvTimerGoroutine:
+			timerGoids[raw.args[0]] = true
+		case EvStack:
+			if len(raw.args) < 2 {
+				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want at least 2, got %v",
+					raw.off, len(raw.args))
+				return
+			}
+			size := raw.args[1]
+			if size > 1000 {
+				err = fmt.Errorf("EvStack has bad number of frames at offset 0x%x: %v",
+					raw.off, size)
+				return
+			}
+			want := 2 + 4*size
+			if ver < 1007 {
+				want = 2 + size
+			}
+			if uint64(len(raw.args)) != want {
+				err = fmt.Errorf("EvStack has wrong number of arguments at offset 0x%x: want %v, got %v",
+					raw.off, want, len(raw.args))
+				return
+			}
+			id := raw.args[0]
+			if id != 0 && size > 0 {
+				stk := make([]*Frame, size)
+				for i := 0; i < int(size); i++ {
+					if ver < 1007 {
+						stk[i] = &Frame{PC: raw.args[2+i]}
+					} else {
+						pc := raw.args[2+i*4+0]
+						fn := raw.args[2+i*4+1]
+						file := raw.args[2+i*4+2]
+						line := raw.args[2+i*4+3]
+						stk[i] = &Frame{PC: pc, Fn: strings[fn], File: strings[file], Line: int(line)}
+					}
+				}
+				stacks[id] = stk
+			}
+		default:
+			e := &Event{Off: raw.off, Type: raw.typ, P: lastP, G: lastG}
+			var argOffset int
+			if ver < 1007 {
+				e.seq = lastSeq + int64(raw.args[0])
+				e.Ts = lastTs + int64(raw.args[1])
+				lastSeq = e.seq
+				argOffset = 2
+			} else {
+				e.Ts = lastTs + int64(raw.args[0])
+				argOffset = 1
+			}
+			lastTs = e.Ts
+			for i := argOffset; i < narg; i++ {
+				if i == narg-1 && desc.Stack {
+					e.StkID = raw.args[i]
+				} else {
+					e.Args[i-argOffset] = raw.args[i]
+				}
+			}
+			switch raw.typ {
+			case EvGoStart, EvGoStartLocal, EvGoStartLabel:
+				lastG = e.Args[0]
+				e.G = lastG
+				if raw.typ == EvGoStartLabel {
+					e.SArgs = []string{strings[e.Args[2]]}
+				}
+			case EvGCSTWStart:
+				e.G = 0
+				switch e.Args[0] {
+				case 0:
+					e.SArgs = []string{"mark termination"}
+				case 1:
+					e.SArgs = []string{"sweep termination"}
+				default:
+					err = fmt.Errorf("unknown STW kind %d", e.Args[0])
+					return
+				}
+			case EvGCStart, EvGCDone, EvGCSTWDone:
+				e.G = 0
+			case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
+				EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+				EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
+				EvGoSysBlock, EvGoBlockGC:
+				lastG = 0
+			case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+				e.G = e.Args[0]
+			case EvUserTaskCreate:
+				// e.Args 0: taskID, 1:parentID, 2:nameID
+				e.SArgs = []string{strings[e.Args[2]]}
+			case EvUserRegion:
+				// e.Args 0: taskID, 1: mode, 2:nameID
+				e.SArgs = []string{strings[e.Args[2]]}
+			case EvUserLog:
+				// e.Args 0: taskID, 1:keyID, 2: stackID
+				e.SArgs = []string{strings[e.Args[1]], raw.sargs[0]}
+			}
+			batches[lastP] = append(batches[lastP], e)
+		}
+	}
+	if len(batches) == 0 {
+		err = fmt.Errorf("trace is empty")
+		return
+	}
+	if ticksPerSec == 0 {
+		err = fmt.Errorf("no EvFrequency event")
+		return
+	}
+	if BreakTimestampsForTesting {
+		var batchArr [][]*Event
+		for _, batch := range batches {
+			batchArr = append(batchArr, batch)
+		}
+		for i := 0; i < 5; i++ {
+			batch := batchArr[rand.Intn(len(batchArr))]
+			batch[rand.Intn(len(batch))].Ts += int64(rand.Intn(2000) - 1000)
+		}
+	}
+	if ver < 1007 {
+		events, err = order1005(batches)
+	} else {
+		events, err = order1007(batches)
+	}
+	if err != nil {
+		return
+	}
+
+	// Translate cpu ticks to real time.
+	minTs := events[0].Ts
+	// Use floating point to avoid integer overflows.
+	freq := 1e9 / float64(ticksPerSec)
+	for _, ev := range events {
+		ev.Ts = int64(float64(ev.Ts-minTs) * freq)
+		// Move timers and syscalls to separate fake Ps.
+		if timerGoids[ev.G] && ev.Type == EvGoUnblock {
+			ev.P = TimerP
+		}
+		if ev.Type == EvGoSysExit {
+			ev.P = SyscallP
+		}
+	}
+
+	return
+}
+
+// removeFutile removes all constituents of futile wakeups (block, unblock, start).
+// For example, a goroutine was unblocked on a mutex, but another goroutine got
+// ahead and acquired the mutex before the first goroutine is scheduled,
+// so the first goroutine has to block again. Such wakeups happen on buffered
+// channels and sync.Mutex, but are generally not interesting for end user.
+func removeFutile(events []*Event) []*Event {
+	// Two non-trivial aspects:
+	// 1. A goroutine can be preempted during a futile wakeup and migrate to another P.
+	//	We want to remove all of that.
+	// 2. Tracing can start in the middle of a futile wakeup.
+	//	That is, we can see a futile wakeup event w/o the actual wakeup before it.
+	// postProcessTrace runs after us and ensures that we leave the trace in a consistent state.
+
+	// Phase 1: determine futile wakeup sequences.
+	type G struct {
+		futile bool
+		wakeup []*Event // wakeup sequence (subject for removal)
+	}
+	gs := make(map[uint64]G)
+	futile := make(map[*Event]bool)
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGoUnblock:
+			g := gs[ev.Args[0]]
+			g.wakeup = []*Event{ev}
+			gs[ev.Args[0]] = g
+		case EvGoStart, EvGoPreempt, EvFutileWakeup:
+			g := gs[ev.G]
+			g.wakeup = append(g.wakeup, ev)
+			if ev.Type == EvFutileWakeup {
+				g.futile = true
+			}
+			gs[ev.G] = g
+		case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond:
+			g := gs[ev.G]
+			if g.futile {
+				futile[ev] = true
+				for _, ev1 := range g.wakeup {
+					futile[ev1] = true
+				}
+			}
+			delete(gs, ev.G)
+		}
+	}
+
+	// Phase 2: remove futile wakeup sequences.
+	newEvents := events[:0] // overwrite the original slice
+	for _, ev := range events {
+		if !futile[ev] {
+			newEvents = append(newEvents, ev)
+		}
+	}
+	return newEvents
+}
+
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = fmt.Errorf("time stamps out of order")
+
+// postProcessTrace does inter-event verification and information restoration.
+// The resulting trace is guaranteed to be consistent
+// (for example, a P does not run two Gs at the same time, or a G is indeed
+// blocked before an unblock event).
+func postProcessTrace(ver int, events []*Event) error {
+	const (
+		gDead = iota
+		gRunnable
+		gRunning
+		gWaiting
+	)
+	type gdesc struct {
+		state        int
+		ev           *Event
+		evStart      *Event
+		evCreate     *Event
+		evMarkAssist *Event
+	}
+	type pdesc struct {
+		running bool
+		g       uint64
+		evSTW   *Event
+		evSweep *Event
+	}
+
+	gs := make(map[uint64]gdesc)
+	ps := make(map[int]pdesc)
+	tasks := make(map[uint64]*Event)           // task id to task creation events
+	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
+	gs[0] = gdesc{state: gRunning}
+	var evGC, evSTW *Event
+
+	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
+		name := EventDescriptions[ev.Type].Name
+		if g.state != gRunning {
+			return fmt.Errorf("g %v is not running while %v (offset %v, time %v)", ev.G, name, ev.Off, ev.Ts)
+		}
+		if p.g != ev.G {
+			return fmt.Errorf("p %v is not running g %v while %v (offset %v, time %v)", ev.P, ev.G, name, ev.Off, ev.Ts)
+		}
+		if !allowG0 && ev.G == 0 {
+			return fmt.Errorf("g 0 did %v (offset %v, time %v)", EventDescriptions[ev.Type].Name, ev.Off, ev.Ts)
+		}
+		return nil
+	}
+
+	for _, ev := range events {
+		g := gs[ev.G]
+		p := ps[ev.P]
+
+		switch ev.Type {
+		case EvProcStart:
+			if p.running {
+				return fmt.Errorf("p %v is running before start (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+			}
+			p.running = true
+		case EvProcStop:
+			if !p.running {
+				return fmt.Errorf("p %v is not running before stop (offset %v, time %v)", ev.P, ev.Off, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %v is running a goroutine %v during stop (offset %v, time %v)", ev.P, p.g, ev.Off, ev.Ts)
+			}
+			p.running = false
+		case EvGCStart:
+			if evGC != nil {
+				return fmt.Errorf("previous GC is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			evGC = ev
+			// Attribute this to the global GC state.
+			ev.P = GCP
+		case EvGCDone:
+			if evGC == nil {
+				return fmt.Errorf("bogus GC end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			evGC.Link = ev
+			evGC = nil
+		case EvGCSTWStart:
+			evp := &evSTW
+			if ver < 1010 {
+				// Before 1.10, EvGCSTWStart was per-P.
+				evp = &p.evSTW
+			}
+			if *evp != nil {
+				return fmt.Errorf("previous STW is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			*evp = ev
+		case EvGCSTWDone:
+			evp := &evSTW
+			if ver < 1010 {
+				// Before 1.10, EvGCSTWDone was per-P.
+				evp = &p.evSTW
+			}
+			if *evp == nil {
+				return fmt.Errorf("bogus STW end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			(*evp).Link = ev
+			*evp = nil
+		case EvGCSweepStart:
+			if p.evSweep != nil {
+				return fmt.Errorf("previous sweeping is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			p.evSweep = ev
+		case EvGCMarkAssistStart:
+			if g.evMarkAssist != nil {
+				return fmt.Errorf("previous mark assist is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			g.evMarkAssist = ev
+		case EvGCMarkAssistDone:
+			// Unlike most events, mark assists can be in progress when a
+			// goroutine starts tracing, so we can't report an error here.
+			if g.evMarkAssist != nil {
+				g.evMarkAssist.Link = ev
+				g.evMarkAssist = nil
+			}
+		case EvGCSweepDone:
+			if p.evSweep == nil {
+				return fmt.Errorf("bogus sweeping end (offset %v, time %v)", ev.Off, ev.Ts)
+			}
+			p.evSweep.Link = ev
+			p.evSweep = nil
+		case EvGoWaiting:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoWaiting (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+		case EvGoInSyscall:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before EvGoInSyscall (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+		case EvGoCreate:
+			if err := checkRunning(p, g, ev, true); err != nil {
+				return err
+			}
+			if _, ok := gs[ev.Args[0]]; ok {
+				return fmt.Errorf("g %v already exists (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			}
+			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
+		case EvGoStart, EvGoStartLabel:
+			if g.state != gRunnable {
+				return fmt.Errorf("g %v is not runnable before start (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %v is already running g %v while start g %v (offset %v, time %v)", ev.P, p.g, ev.G, ev.Off, ev.Ts)
+			}
+			g.state = gRunning
+			g.evStart = ev
+			p.g = ev.G
+			if g.evCreate != nil {
+				if ver < 1007 {
+					// +1 because symbolizer expects return pc.
+					ev.Stk = []*Frame{{PC: g.evCreate.Args[1] + 1}}
+				} else {
+					ev.StkID = g.evCreate.Args[1]
+				}
+				g.evCreate = nil
+			}
+
+			if g.ev != nil {
+				g.ev.Link = ev
+				g.ev = nil
+			}
+		case EvGoEnd, EvGoStop:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.evStart.Link = ev
+			g.evStart = nil
+			g.state = gDead
+			p.g = 0
+
+			if ev.Type == EvGoEnd { // flush all active regions
+				regions := activeRegions[ev.G]
+				for _, s := range regions {
+					s.Link = ev
+				}
+				delete(activeRegions, ev.G)
+			}
+
+		case EvGoSched, EvGoPreempt:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gRunnable
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+			g.ev = ev
+		case EvGoUnblock:
+			if g.state != gRunning {
+				return fmt.Errorf("g %v is not running while unpark (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if ev.P != TimerP && p.g != ev.G {
+				return fmt.Errorf("p %v is not running g %v while unpark (offset %v, time %v)", ev.P, ev.G, ev.Off, ev.Ts)
+			}
+			g1 := gs[ev.Args[0]]
+			if g1.state != gWaiting {
+				return fmt.Errorf("g %v is not waiting before unpark (offset %v, time %v)", ev.Args[0], ev.Off, ev.Ts)
+			}
+			if g1.ev != nil && g1.ev.Type == EvGoBlockNet && ev.P != TimerP {
+				ev.P = NetpollP
+			}
+			if g1.ev != nil {
+				g1.ev.Link = ev
+			}
+			g1.state = gRunnable
+			g1.ev = ev
+			gs[ev.Args[0]] = g1
+		case EvGoSysCall:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.ev = ev
+		case EvGoSysBlock:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+		case EvGoSysExit:
+			if g.state != gWaiting {
+				return fmt.Errorf("g %v is not waiting during syscall exit (offset %v, time %v)", ev.G, ev.Off, ev.Ts)
+			}
+			if g.ev != nil && g.ev.Type == EvGoSysCall {
+				g.ev.Link = ev
+			}
+			g.state = gRunnable
+			g.ev = ev
+		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.ev = ev
+			g.evStart.Link = ev
+			g.evStart = nil
+			p.g = 0
+		case EvUserTaskCreate:
+			taskid := ev.Args[0]
+			if prevEv, ok := tasks[taskid]; ok {
+				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
+			}
+			tasks[ev.Args[0]] = ev
+		case EvUserTaskEnd:
+			taskid := ev.Args[0]
+			if taskCreateEv, ok := tasks[taskid]; ok {
+				taskCreateEv.Link = ev
+				delete(tasks, taskid)
+			}
+		case EvUserRegion:
+			mode := ev.Args[1]
+			regions := activeRegions[ev.G]
+			if mode == 0 { // region start
+				activeRegions[ev.G] = append(regions, ev) // push
+			} else if mode == 1 { // region end
+				n := len(regions)
+				if n > 0 { // matching region start event is in the trace.
+					s := regions[n-1]
+					if s.Args[0] != ev.Args[0] || s.SArgs[0] != ev.SArgs[0] { // task id, region name mismatch
+						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
+					}
+					// Link region start event with span end event
+					s.Link = ev
+
+					if n > 1 {
+						activeRegions[ev.G] = regions[:n-1]
+					} else {
+						delete(activeRegions, ev.G)
+					}
+				}
+			} else {
+				return fmt.Errorf("invalid user region mode: %q", ev)
+			}
+		}
+
+		gs[ev.G] = g
+		ps[ev.P] = p
+	}
+
+	// TODO(dvyukov): restore stacks for EvGoStart events.
+	// TODO(dvyukov): test that all EvGoStart events has non-nil Link.
+
+	return nil
+}
+
+// symbolize attaches func/file/line info to stack traces.
+func symbolize(events []*Event, bin string) error {
+	// First, collect and dedup all pcs.
+	pcs := make(map[uint64]*Frame)
+	for _, ev := range events {
+		for _, f := range ev.Stk {
+			pcs[f.PC] = nil
+		}
+	}
+
+	// Start addr2line.
+	cmd := exec.Command(goCmd(), "tool", "addr2line", bin)
+	in, err := cmd.StdinPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdin: %v", err)
+	}
+	cmd.Stderr = os.Stderr
+	out, err := cmd.StdoutPipe()
+	if err != nil {
+		return fmt.Errorf("failed to pipe addr2line stdout: %v", err)
+	}
+	err = cmd.Start()
+	if err != nil {
+		return fmt.Errorf("failed to start addr2line: %v", err)
+	}
+	outb := bufio.NewReader(out)
+
+	// Write all pcs to addr2line.
+	// Need to copy pcs to an array, because map iteration order is non-deterministic.
+	var pcArray []uint64
+	for pc := range pcs {
+		pcArray = append(pcArray, pc)
+		_, err := fmt.Fprintf(in, "0x%x\n", pc-1)
+		if err != nil {
+			return fmt.Errorf("failed to write to addr2line: %v", err)
+		}
+	}
+	in.Close()
+
+	// Read in answers.
+	for _, pc := range pcArray {
+		fn, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		file, err := outb.ReadString('\n')
+		if err != nil {
+			return fmt.Errorf("failed to read from addr2line: %v", err)
+		}
+		f := &Frame{PC: pc}
+		f.Fn = fn[:len(fn)-1]
+		f.File = file[:len(file)-1]
+		if colon := strings.LastIndex(f.File, ":"); colon != -1 {
+			ln, err := strconv.Atoi(f.File[colon+1:])
+			if err == nil {
+				f.File = f.File[:colon]
+				f.Line = ln
+			}
+		}
+		pcs[pc] = f
+	}
+	cmd.Wait()
+
+	// Replace frames in events array.
+	for _, ev := range events {
+		for i, f := range ev.Stk {
+			ev.Stk[i] = pcs[f.PC]
+		}
+	}
+
+	return nil
+}
+
+// readVal reads unsigned base-128 value from r.
+func readVal(r io.Reader, off0 int) (v uint64, off int, err error) {
+	off = off0
+	for i := 0; i < 10; i++ {
+		var buf [1]byte
+		var n int
+		n, err = r.Read(buf[:])
+		if err != nil || n != 1 {
+			return 0, 0, fmt.Errorf("failed to read trace at offset %d: read %v, error %v", off0, n, err)
+		}
+		off++
+		v |= uint64(buf[0]&0x7f) << (uint(i) * 7)
+		if buf[0]&0x80 == 0 {
+			return
+		}
+	}
+	return 0, 0, fmt.Errorf("bad value at offset 0x%x", off0)
+}
+
+// Print dumps events to stdout. For debugging.
+func Print(events []*Event) {
+	for _, ev := range events {
+		PrintEvent(ev)
+	}
+}
+
+// PrintEvent dumps the event to stdout. For debugging.
+func PrintEvent(ev *Event) {
+	fmt.Printf("%s\n", ev)
+}
+
+func (ev *Event) String() string {
+	desc := EventDescriptions[ev.Type]
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "%v %v p=%v g=%v off=%v", ev.Ts, desc.Name, ev.P, ev.G, ev.Off)
+	for i, a := range desc.Args {
+		fmt.Fprintf(w, " %v=%v", a, ev.Args[i])
+	}
+	for i, a := range desc.SArgs {
+		fmt.Fprintf(w, " %v=%v", a, ev.SArgs[i])
+	}
+	return w.String()
+}
+
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func argNum(raw rawEvent, ver int) int {
+	desc := EventDescriptions[raw.typ]
+	if raw.typ == EvStack {
+		return len(raw.args)
+	}
+	narg := len(desc.Args)
+	if desc.Stack {
+		narg++
+	}
+	switch raw.typ {
+	case EvBatch, EvFrequency, EvTimerGoroutine:
+		if ver < 1007 {
+			narg++ // there was an unused arg before 1.7
+		}
+		return narg
+	}
+	narg++ // timestamp
+	if ver < 1007 {
+		narg++ // sequence
+	}
+	switch raw.typ {
+	case EvGCSweepDone:
+		if ver < 1009 {
+			narg -= 2 // 1.9 added two arguments
+		}
+	case EvGCStart, EvGoStart, EvGoUnblock:
+		if ver < 1007 {
+			narg-- // 1.7 added an additional seq arg
+		}
+	case EvGCSTWStart:
+		if ver < 1010 {
+			narg-- // 1.10 added an argument
+		}
+	}
+	return narg
+}
+
+// BreakTimestampsForTesting causes the parser to randomly alter timestamps (for testing of broken cputicks).
+var BreakTimestampsForTesting bool
+
+// Event types in the trace.
+// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
+const (
+	EvNone              = 0  // unused
+	EvBatch             = 1  // start of per-P batch of events [pid, timestamp]
+	EvFrequency         = 2  // contains tracer timer frequency [frequency (ticks per second)]
+	EvStack             = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
+	EvGomaxprocs        = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
+	EvProcStart         = 5  // start of P [timestamp, thread id]
+	EvProcStop          = 6  // stop of P [timestamp]
+	EvGCStart           = 7  // GC start [timestamp, seq, stack id]
+	EvGCDone            = 8  // GC done [timestamp]
+	EvGCSTWStart        = 9  // GC mark termination start [timestamp, kind]
+	EvGCSTWDone         = 10 // GC mark termination done [timestamp]
+	EvGCSweepStart      = 11 // GC sweep start [timestamp, stack id]
+	EvGCSweepDone       = 12 // GC sweep done [timestamp, swept, reclaimed]
+	EvGoCreate          = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
+	EvGoStart           = 14 // goroutine starts running [timestamp, goroutine id, seq]
+	EvGoEnd             = 15 // goroutine ends [timestamp]
+	EvGoStop            = 16 // goroutine stops (like in select{}) [timestamp, stack]
+	EvGoSched           = 17 // goroutine calls Gosched [timestamp, stack]
+	EvGoPreempt         = 18 // goroutine is preempted [timestamp, stack]
+	EvGoSleep           = 19 // goroutine calls Sleep [timestamp, stack]
+	EvGoBlock           = 20 // goroutine blocks [timestamp, stack]
+	EvGoUnblock         = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
+	EvGoBlockSend       = 22 // goroutine blocks on chan send [timestamp, stack]
+	EvGoBlockRecv       = 23 // goroutine blocks on chan recv [timestamp, stack]
+	EvGoBlockSelect     = 24 // goroutine blocks on select [timestamp, stack]
+	EvGoBlockSync       = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
+	EvGoBlockCond       = 26 // goroutine blocks on Cond [timestamp, stack]
+	EvGoBlockNet        = 27 // goroutine blocks on network [timestamp, stack]
+	EvGoSysCall         = 28 // syscall enter [timestamp, stack]
+	EvGoSysExit         = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
+	EvGoSysBlock        = 30 // syscall blocks [timestamp]
+	EvGoWaiting         = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+	EvGoInSyscall       = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+	EvHeapAlloc         = 33 // gcController.heapLive change [timestamp, heap live bytes]
+	EvHeapGoal          = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
+	EvTimerGoroutine    = 35 // denotes timer goroutine [timer goroutine id]
+	EvFutileWakeup      = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
+	EvString            = 37 // string dictionary entry [ID, length, string]
+	EvGoStartLocal      = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	EvGoUnblockLocal    = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	EvGoSysExitLocal    = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	EvGoStartLabel      = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+	EvGoBlockGC         = 42 // goroutine blocks on GC assist [timestamp, stack]
+	EvGCMarkAssistStart = 43 // GC mark assist start [timestamp, stack]
+	EvGCMarkAssistDone  = 44 // GC mark assist done [timestamp]
+	EvUserTaskCreate    = 45 // trace.NewContext [timestamp, internal task id, internal parent id, stack, name string]
+	EvUserTaskEnd       = 46 // end of task [timestamp, internal task id, stack]
+	EvUserRegion        = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string]
+	EvUserLog           = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
+	EvCount             = 49
+)
+
+var EventDescriptions = [EvCount]struct {
+	Name       string
+	minVersion int
+	Stack      bool
+	Args       []string
+	SArgs      []string // string arguments
+}{
+	EvNone:              {"None", 1005, false, []string{}, nil},
+	EvBatch:             {"Batch", 1005, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
+	EvFrequency:         {"Frequency", 1005, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
+	EvStack:             {"Stack", 1005, false, []string{"id", "siz"}, nil},
+	EvGomaxprocs:        {"Gomaxprocs", 1005, true, []string{"procs"}, nil},
+	EvProcStart:         {"ProcStart", 1005, false, []string{"thread"}, nil},
+	EvProcStop:          {"ProcStop", 1005, false, []string{}, nil},
+	EvGCStart:           {"GCStart", 1005, true, []string{"seq"}, nil}, // in 1.5 format it was {}
+	EvGCDone:            {"GCDone", 1005, false, []string{}, nil},
+	EvGCSTWStart:        {"GCSTWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+	EvGCSTWDone:         {"GCSTWDone", 1005, false, []string{}, nil},
+	EvGCSweepStart:      {"GCSweepStart", 1005, true, []string{}, nil},
+	EvGCSweepDone:       {"GCSweepDone", 1005, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
+	EvGoCreate:          {"GoCreate", 1005, true, []string{"g", "stack"}, nil},
+	EvGoStart:           {"GoStart", 1005, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoEnd:             {"GoEnd", 1005, false, []string{}, nil},
+	EvGoStop:            {"GoStop", 1005, true, []string{}, nil},
+	EvGoSched:           {"GoSched", 1005, true, []string{}, nil},
+	EvGoPreempt:         {"GoPreempt", 1005, true, []string{}, nil},
+	EvGoSleep:           {"GoSleep", 1005, true, []string{}, nil},
+	EvGoBlock:           {"GoBlock", 1005, true, []string{}, nil},
+	EvGoUnblock:         {"GoUnblock", 1005, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoBlockSend:       {"GoBlockSend", 1005, true, []string{}, nil},
+	EvGoBlockRecv:       {"GoBlockRecv", 1005, true, []string{}, nil},
+	EvGoBlockSelect:     {"GoBlockSelect", 1005, true, []string{}, nil},
+	EvGoBlockSync:       {"GoBlockSync", 1005, true, []string{}, nil},
+	EvGoBlockCond:       {"GoBlockCond", 1005, true, []string{}, nil},
+	EvGoBlockNet:        {"GoBlockNet", 1005, true, []string{}, nil},
+	EvGoSysCall:         {"GoSysCall", 1005, true, []string{}, nil},
+	EvGoSysExit:         {"GoSysExit", 1005, false, []string{"g", "seq", "ts"}, nil},
+	EvGoSysBlock:        {"GoSysBlock", 1005, false, []string{}, nil},
+	EvGoWaiting:         {"GoWaiting", 1005, false, []string{"g"}, nil},
+	EvGoInSyscall:       {"GoInSyscall", 1005, false, []string{"g"}, nil},
+	EvHeapAlloc:         {"HeapAlloc", 1005, false, []string{"mem"}, nil},
+	EvHeapGoal:          {"HeapGoal", 1005, false, []string{"mem"}, nil},
+	EvTimerGoroutine:    {"TimerGoroutine", 1005, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
+	EvFutileWakeup:      {"FutileWakeup", 1005, false, []string{}, nil},
+	EvString:            {"String", 1007, false, []string{}, nil},
+	EvGoStartLocal:      {"GoStartLocal", 1007, false, []string{"g"}, nil},
+	EvGoUnblockLocal:    {"GoUnblockLocal", 1007, true, []string{"g"}, nil},
+	EvGoSysExitLocal:    {"GoSysExitLocal", 1007, false, []string{"g", "ts"}, nil},
+	EvGoStartLabel:      {"GoStartLabel", 1008, false, []string{"g", "seq", "labelid"}, []string{"label"}},
+	EvGoBlockGC:         {"GoBlockGC", 1008, true, []string{}, nil},
+	EvGCMarkAssistStart: {"GCMarkAssistStart", 1009, true, []string{}, nil},
+	EvGCMarkAssistDone:  {"GCMarkAssistDone", 1009, false, []string{}, nil},
+	EvUserTaskCreate:    {"UserTaskCreate", 1011, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
+	EvUserTaskEnd:       {"UserTaskEnd", 1011, true, []string{"taskid"}, nil},
+	EvUserRegion:        {"UserRegion", 1011, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
+	EvUserLog:           {"UserLog", 1011, true, []string{"id", "keyid"}, []string{"category", "message"}},
+}
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
new file mode 100644
index 0000000..cdab95a
--- /dev/null
+++ b/src/internal/trace/parser_test.go
@@ -0,0 +1,110 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestCorruptedInputs(t *testing.T) {
+	// These inputs crashed parser previously.
+	tests := []string{
+		"gotrace\x00\x020",
+		"gotrace\x00Q00\x020",
+		"gotrace\x00T00\x020",
+		"gotrace\x00\xc3\x0200",
+		"go 1.5 trace\x00\x00\x00\x00\x020",
+		"go 1.5 trace\x00\x00\x00\x00Q00\x020",
+		"go 1.5 trace\x00\x00\x00\x00T00\x020",
+		"go 1.5 trace\x00\x00\x00\x00\xc3\x0200",
+	}
+	for _, data := range tests {
+		res, err := Parse(strings.NewReader(data), "")
+		if err == nil || res.Events != nil || res.Stacks != nil {
+			t.Fatalf("no error on input: %q", data)
+		}
+	}
+}
+
+func TestParseCanned(t *testing.T) {
+	files, err := os.ReadDir("./testdata")
+	if err != nil {
+		t.Fatalf("failed to read ./testdata: %v", err)
+	}
+	for _, f := range files {
+		info, err := f.Info()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if testing.Short() && info.Size() > 10000 {
+			continue
+		}
+		name := filepath.Join("./testdata", f.Name())
+		data, err := os.ReadFile(name)
+		if err != nil {
+			t.Fatal(err)
+		}
+		// Instead of Parse that requires a proper binary name for old traces,
+		// we use 'parse' that omits symbol lookup if an empty string is given.
+		_, _, err = parse(bytes.NewReader(data), "")
+		switch {
+		case strings.HasSuffix(f.Name(), "_good"):
+			if err != nil {
+				t.Errorf("failed to parse good trace %v: %v", f.Name(), err)
+			}
+		case strings.HasSuffix(f.Name(), "_unordered"):
+			if err != ErrTimeOrder {
+				t.Errorf("unordered trace is not detected %v: %v", f.Name(), err)
+			}
+		default:
+			t.Errorf("unknown input file suffix: %v", f.Name())
+		}
+	}
+}
+
+func TestParseVersion(t *testing.T) {
+	tests := map[string]int{
+		"go 1.5 trace\x00\x00\x00\x00": 1005,
+		"go 1.7 trace\x00\x00\x00\x00": 1007,
+		"go 1.10 trace\x00\x00\x00":    1010,
+		"go 1.25 trace\x00\x00\x00":    1025,
+		"go 1.234 trace\x00\x00":       1234,
+		"go 1.2345 trace\x00":          -1,
+		"go 0.0 trace\x00\x00\x00\x00": -1,
+		"go a.b trace\x00\x00\x00\x00": -1,
+	}
+	for header, ver := range tests {
+		ver1, err := parseHeader([]byte(header))
+		if ver == -1 {
+			if err == nil {
+				t.Fatalf("no error on input: %q, version %v", header, ver1)
+			}
+		} else {
+			if err != nil {
+				t.Fatalf("failed to parse: %q (%v)", header, err)
+			}
+			if ver != ver1 {
+				t.Fatalf("wrong version: %v, want %v, input: %q", ver1, ver, header)
+			}
+		}
+	}
+}
+
+func TestTimestampOverflow(t *testing.T) {
+	// Test that parser correctly handles large timestamps (long tracing).
+	w := NewWriter()
+	w.Emit(EvBatch, 0, 0)
+	w.Emit(EvFrequency, 1e9)
+	for ts := uint64(1); ts < 1e16; ts *= 2 {
+		w.Emit(EvGoCreate, ts, ts, 0, 0)
+	}
+	if _, err := Parse(w, ""); err != nil {
+		t.Fatalf("failed to parse: %v", err)
+	}
+}
diff --git a/src/internal/trace/testdata/http_1_10_good b/src/internal/trace/testdata/http_1_10_good
new file mode 100644
index 0000000..a4f2ed8
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_10_good
diff --git a/src/internal/trace/testdata/http_1_11_good b/src/internal/trace/testdata/http_1_11_good
new file mode 100644
index 0000000..0efcc6f
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_11_good
diff --git a/src/internal/trace/testdata/http_1_5_good b/src/internal/trace/testdata/http_1_5_good
new file mode 100644
index 0000000..0736cae
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_5_good
diff --git a/src/internal/trace/testdata/http_1_7_good b/src/internal/trace/testdata/http_1_7_good
new file mode 100644
index 0000000..b0e318e
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_7_good
diff --git a/src/internal/trace/testdata/http_1_9_good b/src/internal/trace/testdata/http_1_9_good
new file mode 100644
index 0000000..ca89278
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_9_good
diff --git a/src/internal/trace/testdata/stress_1_10_good b/src/internal/trace/testdata/stress_1_10_good
new file mode 100644
index 0000000..19778b0
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_10_good
diff --git a/src/internal/trace/testdata/stress_1_11_good b/src/internal/trace/testdata/stress_1_11_good
new file mode 100644
index 0000000..6468d89
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_11_good
diff --git a/src/internal/trace/testdata/stress_1_5_good b/src/internal/trace/testdata/stress_1_5_good
new file mode 100644
index 0000000..c5055eb
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_good
diff --git a/src/internal/trace/testdata/stress_1_5_unordered b/src/internal/trace/testdata/stress_1_5_unordered
new file mode 100644
index 0000000..11f7d74
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_5_unordered
diff --git a/src/internal/trace/testdata/stress_1_7_good b/src/internal/trace/testdata/stress_1_7_good
new file mode 100644
index 0000000..b4d927d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_7_good
diff --git a/src/internal/trace/testdata/stress_1_9_good b/src/internal/trace/testdata/stress_1_9_good
new file mode 100644
index 0000000..dcf17f1
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_9_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_10_good b/src/internal/trace/testdata/stress_start_stop_1_10_good
new file mode 100644
index 0000000..b908e10
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_10_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_11_good b/src/internal/trace/testdata/stress_start_stop_1_11_good
new file mode 100644
index 0000000..457f01a
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_11_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_5_good b/src/internal/trace/testdata/stress_start_stop_1_5_good
new file mode 100644
index 0000000..72a887b
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_5_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_7_good b/src/internal/trace/testdata/stress_start_stop_1_7_good
new file mode 100644
index 0000000..c23ed7d
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_7_good
diff --git a/src/internal/trace/testdata/stress_start_stop_1_9_good b/src/internal/trace/testdata/stress_start_stop_1_9_good
new file mode 100644
index 0000000..f00f190
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_9_good
diff --git a/src/internal/trace/testdata/user_task_span_1_11_good b/src/internal/trace/testdata/user_task_span_1_11_good
new file mode 100644
index 0000000..f4edb67
--- /dev/null
+++ b/src/internal/trace/testdata/user_task_span_1_11_good
diff --git a/src/internal/trace/writer.go b/src/internal/trace/writer.go
new file mode 100644
index 0000000..dd0b9f1
--- /dev/null
+++ b/src/internal/trace/writer.go
@@ -0,0 +1,49 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace
+
+import "bytes"
+
+// Writer is a test trace writer.
+type Writer struct {
+	bytes.Buffer
+}
+
+func NewWriter() *Writer {
+	w := new(Writer)
+	w.Write([]byte("go 1.9 trace\x00\x00\x00\x00"))
+	return w
+}
+
+// Emit writes an event record to the trace.
+// See Event types for valid types and required arguments.
+func (w *Writer) Emit(typ byte, args ...uint64) {
+	nargs := byte(len(args)) - 1
+	if nargs > 3 {
+		nargs = 3
+	}
+	buf := []byte{typ | nargs<<6}
+	if nargs == 3 {
+		buf = append(buf, 0)
+	}
+	for _, a := range args {
+		buf = appendVarint(buf, a)
+	}
+	if nargs == 3 {
+		buf[1] = byte(len(buf) - 2)
+	}
+	n, err := w.Write(buf)
+	if n != len(buf) || err != nil {
+		panic("failed to write")
+	}
+}
+
+func appendVarint(buf []byte, v uint64) []byte {
+	for ; v >= 0x80; v >>= 7 {
+		buf = append(buf, 0x80|byte(v))
+	}
+	buf = append(buf, byte(v))
+	return buf
+}
diff --git a/src/internal/unsafeheader/unsafeheader.go b/src/internal/unsafeheader/unsafeheader.go
new file mode 100644
index 0000000..6d092c6
--- /dev/null
+++ b/src/internal/unsafeheader/unsafeheader.go
@@ -0,0 +1,37 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package unsafeheader contains header declarations for the Go runtime's slice
+// and string implementations.
+//
+// This package allows packages that cannot import "reflect" to use types that
+// are tested to be equivalent to reflect.SliceHeader and reflect.StringHeader.
+package unsafeheader
+
+import (
+	"unsafe"
+)
+
+// Slice is the runtime representation of a slice.
+// It cannot be used safely or portably and its representation may
+// change in a later release.
+//
+// Unlike reflect.SliceHeader, its Data field is sufficient to guarantee the
+// data it references will not be garbage collected.
+type Slice struct {
+	Data unsafe.Pointer
+	Len  int
+	Cap  int
+}
+
+// String is the runtime representation of a string.
+// It cannot be used safely or portably and its representation may
+// change in a later release.
+//
+// Unlike reflect.StringHeader, its Data field is sufficient to guarantee the
+// data it references will not be garbage collected.
+type String struct {
+	Data unsafe.Pointer
+	Len  int
+}
diff --git a/src/internal/unsafeheader/unsafeheader_test.go b/src/internal/unsafeheader/unsafeheader_test.go
new file mode 100644
index 0000000..6fb7cca
--- /dev/null
+++ b/src/internal/unsafeheader/unsafeheader_test.go
@@ -0,0 +1,100 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unsafeheader_test
+
+import (
+	"bytes"
+	"internal/unsafeheader"
+	"reflect"
+	"testing"
+	"unsafe"
+)
+
+// TestTypeMatchesReflectType ensures that the name and layout of the
+// unsafeheader types matches the corresponding Header types in the reflect
+// package.
+func TestTypeMatchesReflectType(t *testing.T) {
+	t.Run("Slice", func(t *testing.T) {
+		testHeaderMatchesReflect(t, unsafeheader.Slice{}, reflect.SliceHeader{})
+	})
+
+	t.Run("String", func(t *testing.T) {
+		testHeaderMatchesReflect(t, unsafeheader.String{}, reflect.StringHeader{})
+	})
+}
+
+func testHeaderMatchesReflect(t *testing.T, header, reflectHeader interface{}) {
+	h := reflect.TypeOf(header)
+	rh := reflect.TypeOf(reflectHeader)
+
+	for i := 0; i < h.NumField(); i++ {
+		f := h.Field(i)
+		rf, ok := rh.FieldByName(f.Name)
+		if !ok {
+			t.Errorf("Field %d of %v is named %s, but no such field exists in %v", i, h, f.Name, rh)
+			continue
+		}
+		if !typeCompatible(f.Type, rf.Type) {
+			t.Errorf("%v.%s has type %v, but %v.%s has type %v", h, f.Name, f.Type, rh, rf.Name, rf.Type)
+		}
+		if f.Offset != rf.Offset {
+			t.Errorf("%v.%s has offset %d, but %v.%s has offset %d", h, f.Name, f.Offset, rh, rf.Name, rf.Offset)
+		}
+	}
+
+	if h.NumField() != rh.NumField() {
+		t.Errorf("%v has %d fields, but %v has %d", h, h.NumField(), rh, rh.NumField())
+	}
+	if h.Align() != rh.Align() {
+		t.Errorf("%v has alignment %d, but %v has alignment %d", h, h.Align(), rh, rh.Align())
+	}
+}
+
+var (
+	unsafePointerType = reflect.TypeOf(unsafe.Pointer(nil))
+	uintptrType       = reflect.TypeOf(uintptr(0))
+)
+
+func typeCompatible(t, rt reflect.Type) bool {
+	return t == rt || (t == unsafePointerType && rt == uintptrType)
+}
+
+// TestWriteThroughHeader ensures that the headers in the unsafeheader package
+// can successfully mutate variables of the corresponding built-in types.
+//
+// This test is expected to fail under -race (which implicitly enables
+// -d=checkptr) if the runtime views the header types as incompatible with the
+// underlying built-in types.
+func TestWriteThroughHeader(t *testing.T) {
+	t.Run("Slice", func(t *testing.T) {
+		s := []byte("Hello, checkptr!")[:5]
+
+		var alias []byte
+		hdr := (*unsafeheader.Slice)(unsafe.Pointer(&alias))
+		hdr.Data = unsafe.Pointer(&s[0])
+		hdr.Cap = cap(s)
+		hdr.Len = len(s)
+
+		if !bytes.Equal(alias, s) {
+			t.Errorf("alias of %T(%q) constructed via Slice = %T(%q)", s, s, alias, alias)
+		}
+		if cap(alias) != cap(s) {
+			t.Errorf("alias of %T with cap %d has cap %d", s, cap(s), cap(alias))
+		}
+	})
+
+	t.Run("String", func(t *testing.T) {
+		s := "Hello, checkptr!"
+
+		var alias string
+		hdr := (*unsafeheader.String)(unsafe.Pointer(&alias))
+		hdr.Data = (*unsafeheader.String)(unsafe.Pointer(&s)).Data
+		hdr.Len = len(s)
+
+		if alias != s {
+			t.Errorf("alias of %q constructed via String = %q", s, alias)
+		}
+	})
+}
diff --git a/src/internal/xcoff/ar.go b/src/internal/xcoff/ar.go
new file mode 100644
index 0000000..0fb410f
--- /dev/null
+++ b/src/internal/xcoff/ar.go
@@ -0,0 +1,228 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xcoff
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+)
+
+const (
+	SAIAMAG   = 0x8
+	AIAFMAG   = "`\n"
+	AIAMAG    = "<aiaff>\n"
+	AIAMAGBIG = "<bigaf>\n"
+
+	// Sizeof
+	FL_HSZ_BIG = 0x80
+	AR_HSZ_BIG = 0x70
+)
+
+type bigarFileHeader struct {
+	Flmagic    [SAIAMAG]byte // Archive magic string
+	Flmemoff   [20]byte      // Member table offset
+	Flgstoff   [20]byte      // 32-bits global symtab offset
+	Flgst64off [20]byte      // 64-bits global symtab offset
+	Flfstmoff  [20]byte      // First member offset
+	Fllstmoff  [20]byte      // Last member offset
+	Flfreeoff  [20]byte      // First member on free list offset
+}
+
+type bigarMemberHeader struct {
+	Arsize   [20]byte // File member size
+	Arnxtmem [20]byte // Next member pointer
+	Arprvmem [20]byte // Previous member pointer
+	Ardate   [12]byte // File member date
+	Aruid    [12]byte // File member uid
+	Argid    [12]byte // File member gid
+	Armode   [12]byte // File member mode (octal)
+	Arnamlen [4]byte  // File member name length
+	// _ar_nam is removed because it's easier to get name without it.
+}
+
+// Archive represents an open AIX big archive.
+type Archive struct {
+	ArchiveHeader
+	Members []*Member
+
+	closer io.Closer
+}
+
+// MemberHeader holds information about a big archive file header
+type ArchiveHeader struct {
+	magic string
+}
+
+// Member represents a member of an AIX big archive.
+type Member struct {
+	MemberHeader
+	sr *io.SectionReader
+}
+
+// MemberHeader holds information about a big archive member
+type MemberHeader struct {
+	Name string
+	Size uint64
+}
+
+// OpenArchive opens the named archive using os.Open and prepares it for use
+// as an AIX big archive.
+func OpenArchive(name string) (*Archive, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return nil, err
+	}
+	arch, err := NewArchive(f)
+	if err != nil {
+		f.Close()
+		return nil, err
+	}
+	arch.closer = f
+	return arch, nil
+}
+
+// Close closes the Archive.
+// If the Archive was created using NewArchive directly instead of OpenArchive,
+// Close has no effect.
+func (a *Archive) Close() error {
+	var err error
+	if a.closer != nil {
+		err = a.closer.Close()
+		a.closer = nil
+	}
+	return err
+}
+
+// NewArchive creates a new Archive for accessing an AIX big archive in an underlying reader.
+func NewArchive(r io.ReaderAt) (*Archive, error) {
+	parseDecimalBytes := func(b []byte) (int64, error) {
+		return strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64)
+	}
+	sr := io.NewSectionReader(r, 0, 1<<63-1)
+
+	// Read File Header
+	var magic [SAIAMAG]byte
+	if _, err := sr.ReadAt(magic[:], 0); err != nil {
+		return nil, err
+	}
+
+	arch := new(Archive)
+	switch string(magic[:]) {
+	case AIAMAGBIG:
+		arch.magic = string(magic[:])
+	case AIAMAG:
+		return nil, fmt.Errorf("small AIX archive not supported")
+	default:
+		return nil, fmt.Errorf("unrecognised archive magic: 0x%x", magic)
+	}
+
+	var fhdr bigarFileHeader
+	if _, err := sr.Seek(0, os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	if err := binary.Read(sr, binary.BigEndian, &fhdr); err != nil {
+		return nil, err
+	}
+
+	off, err := parseDecimalBytes(fhdr.Flfstmoff[:])
+	if err != nil {
+		return nil, fmt.Errorf("error parsing offset of first member in archive header(%q); %v", fhdr, err)
+	}
+
+	if off == 0 {
+		// Occurs if the archive is empty.
+		return arch, nil
+	}
+
+	lastoff, err := parseDecimalBytes(fhdr.Fllstmoff[:])
+	if err != nil {
+		return nil, fmt.Errorf("error parsing offset of first member in archive header(%q); %v", fhdr, err)
+	}
+
+	// Read members
+	for {
+		// Read Member Header
+		// The member header is normally 2 bytes larger. But it's easier
+		// to read the name if the header is read without _ar_nam.
+		// However, AIAFMAG must be read afterward.
+		if _, err := sr.Seek(off, os.SEEK_SET); err != nil {
+			return nil, err
+		}
+
+		var mhdr bigarMemberHeader
+		if err := binary.Read(sr, binary.BigEndian, &mhdr); err != nil {
+			return nil, err
+		}
+
+		member := new(Member)
+		arch.Members = append(arch.Members, member)
+
+		size, err := parseDecimalBytes(mhdr.Arsize[:])
+		if err != nil {
+			return nil, fmt.Errorf("error parsing size in member header(%q); %v", mhdr, err)
+		}
+		member.Size = uint64(size)
+
+		// Read name
+		namlen, err := parseDecimalBytes(mhdr.Arnamlen[:])
+		if err != nil {
+			return nil, fmt.Errorf("error parsing name length in member header(%q); %v", mhdr, err)
+		}
+		name := make([]byte, namlen)
+		if err := binary.Read(sr, binary.BigEndian, name); err != nil {
+			return nil, err
+		}
+		member.Name = string(name)
+
+		fileoff := off + AR_HSZ_BIG + namlen
+		if fileoff&1 != 0 {
+			fileoff++
+			if _, err := sr.Seek(1, os.SEEK_CUR); err != nil {
+				return nil, err
+			}
+		}
+
+		// Read AIAFMAG string
+		var fmag [2]byte
+		if err := binary.Read(sr, binary.BigEndian, &fmag); err != nil {
+			return nil, err
+		}
+		if string(fmag[:]) != AIAFMAG {
+			return nil, fmt.Errorf("AIAFMAG not found after member header")
+		}
+
+		fileoff += 2 // Add the two bytes of AIAFMAG
+		member.sr = io.NewSectionReader(sr, fileoff, size)
+
+		if off == lastoff {
+			break
+		}
+		off, err = parseDecimalBytes(mhdr.Arnxtmem[:])
+		if err != nil {
+			return nil, fmt.Errorf("error parsing offset of first member in archive header(%q); %v", fhdr, err)
+		}
+
+	}
+
+	return arch, nil
+
+}
+
+// GetFile returns the XCOFF file defined by member name.
+// FIXME: This doesn't work if an archive has two members with the same
+// name which can occur if a archive has both 32-bits and 64-bits files.
+func (arch *Archive) GetFile(name string) (*File, error) {
+	for _, mem := range arch.Members {
+		if mem.Name == name {
+			return NewFile(mem.sr)
+		}
+	}
+	return nil, fmt.Errorf("unknown member %s in archive", name)
+
+}
diff --git a/src/internal/xcoff/ar_test.go b/src/internal/xcoff/ar_test.go
new file mode 100644
index 0000000..83333d6
--- /dev/null
+++ b/src/internal/xcoff/ar_test.go
@@ -0,0 +1,79 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xcoff
+
+import (
+	"reflect"
+	"testing"
+)
+
+type archiveTest struct {
+	file              string
+	hdr               ArchiveHeader
+	members           []*MemberHeader
+	membersFileHeader []FileHeader
+}
+
+var archTest = []archiveTest{
+	{
+		"testdata/bigar-ppc64",
+		ArchiveHeader{AIAMAGBIG},
+		[]*MemberHeader{
+			{"printbye.o", 836},
+			{"printhello.o", 860},
+		},
+		[]FileHeader{
+			{U64_TOCMAGIC},
+			{U64_TOCMAGIC},
+		},
+	},
+	{
+		"testdata/bigar-empty",
+		ArchiveHeader{AIAMAGBIG},
+		[]*MemberHeader{},
+		[]FileHeader{},
+	},
+}
+
+func TestOpenArchive(t *testing.T) {
+	for i := range archTest {
+		tt := &archTest[i]
+		arch, err := OpenArchive(tt.file)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+		if !reflect.DeepEqual(arch.ArchiveHeader, tt.hdr) {
+			t.Errorf("open archive %s:\n\thave %#v\n\twant %#v\n", tt.file, arch.ArchiveHeader, tt.hdr)
+			continue
+		}
+
+		for i, mem := range arch.Members {
+			if i >= len(tt.members) {
+				break
+			}
+			have := &mem.MemberHeader
+			want := tt.members[i]
+			if !reflect.DeepEqual(have, want) {
+				t.Errorf("open %s, member %d:\n\thave %#v\n\twant %#v\n", tt.file, i, have, want)
+			}
+
+			f, err := arch.GetFile(mem.Name)
+			if err != nil {
+				t.Error(err)
+				continue
+			}
+			if !reflect.DeepEqual(f.FileHeader, tt.membersFileHeader[i]) {
+				t.Errorf("open %s, member file header %d:\n\thave %#v\n\twant %#v\n", tt.file, i, f.FileHeader, tt.membersFileHeader[i])
+			}
+		}
+		tn := len(tt.members)
+		an := len(arch.Members)
+		if tn != an {
+			t.Errorf("open %s: len(Members) = %d, want %d", tt.file, an, tn)
+		}
+
+	}
+}
diff --git a/src/internal/xcoff/file.go b/src/internal/xcoff/file.go
new file mode 100644
index 0000000..05e4fd5
--- /dev/null
+++ b/src/internal/xcoff/file.go
@@ -0,0 +1,687 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package xcoff implements access to XCOFF (Extended Common Object File Format) files.
+package xcoff
+
+import (
+	"debug/dwarf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+)
+
+// SectionHeader holds information about an XCOFF section header.
+type SectionHeader struct {
+	Name           string
+	VirtualAddress uint64
+	Size           uint64
+	Type           uint32
+	Relptr         uint64
+	Nreloc         uint32
+}
+
+type Section struct {
+	SectionHeader
+	Relocs []Reloc
+	io.ReaderAt
+	sr *io.SectionReader
+}
+
+// AuxiliaryCSect holds information about an XCOFF symbol in an AUX_CSECT entry.
+type AuxiliaryCSect struct {
+	Length              int64
+	StorageMappingClass int
+	SymbolType          int
+}
+
+// AuxiliaryFcn holds information about an XCOFF symbol in an AUX_FCN entry.
+type AuxiliaryFcn struct {
+	Size int64
+}
+
+type Symbol struct {
+	Name          string
+	Value         uint64
+	SectionNumber int
+	StorageClass  int
+	AuxFcn        AuxiliaryFcn
+	AuxCSect      AuxiliaryCSect
+}
+
+type Reloc struct {
+	VirtualAddress   uint64
+	Symbol           *Symbol
+	Signed           bool
+	InstructionFixed bool
+	Length           uint8
+	Type             uint8
+}
+
+// ImportedSymbol holds information about an imported XCOFF symbol.
+type ImportedSymbol struct {
+	Name    string
+	Library string
+}
+
+// FileHeader holds information about an XCOFF file header.
+type FileHeader struct {
+	TargetMachine uint16
+}
+
+// A File represents an open XCOFF file.
+type File struct {
+	FileHeader
+	Sections     []*Section
+	Symbols      []*Symbol
+	StringTable  []byte
+	LibraryPaths []string
+
+	closer io.Closer
+}
+
+// Open opens the named file using os.Open and prepares it for use as an XCOFF binary.
+func Open(name string) (*File, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return nil, err
+	}
+	ff, err := NewFile(f)
+	if err != nil {
+		f.Close()
+		return nil, err
+	}
+	ff.closer = f
+	return ff, nil
+}
+
+// Close closes the File.
+// If the File was created using NewFile directly instead of Open,
+// Close has no effect.
+func (f *File) Close() error {
+	var err error
+	if f.closer != nil {
+		err = f.closer.Close()
+		f.closer = nil
+	}
+	return err
+}
+
+// Section returns the first section with the given name, or nil if no such
+// section exists.
+// Xcoff have section's name limited to 8 bytes. Some sections like .gosymtab
+// can be trunked but this method will still find them.
+func (f *File) Section(name string) *Section {
+	for _, s := range f.Sections {
+		if s.Name == name || (len(name) > 8 && s.Name == name[:8]) {
+			return s
+		}
+	}
+	return nil
+}
+
+// SectionByType returns the first section in f with the
+// given type, or nil if there is no such section.
+func (f *File) SectionByType(typ uint32) *Section {
+	for _, s := range f.Sections {
+		if s.Type == typ {
+			return s
+		}
+	}
+	return nil
+}
+
+// cstring converts ASCII byte sequence b to string.
+// It stops once it finds 0 or reaches end of b.
+func cstring(b []byte) string {
+	var i int
+	for i = 0; i < len(b) && b[i] != 0; i++ {
+	}
+	return string(b[:i])
+}
+
+// getString extracts a string from an XCOFF string table.
+func getString(st []byte, offset uint32) (string, bool) {
+	if offset < 4 || int(offset) >= len(st) {
+		return "", false
+	}
+	return cstring(st[offset:]), true
+}
+
+// NewFile creates a new File for accessing an XCOFF binary in an underlying reader.
+func NewFile(r io.ReaderAt) (*File, error) {
+	sr := io.NewSectionReader(r, 0, 1<<63-1)
+	// Read XCOFF target machine
+	var magic uint16
+	if err := binary.Read(sr, binary.BigEndian, &magic); err != nil {
+		return nil, err
+	}
+	if magic != U802TOCMAGIC && magic != U64_TOCMAGIC {
+		return nil, fmt.Errorf("unrecognised XCOFF magic: 0x%x", magic)
+	}
+
+	f := new(File)
+	f.TargetMachine = magic
+
+	// Read XCOFF file header
+	if _, err := sr.Seek(0, os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	var nscns uint16
+	var symptr uint64
+	var nsyms int32
+	var opthdr uint16
+	var hdrsz int
+	switch f.TargetMachine {
+	case U802TOCMAGIC:
+		fhdr := new(FileHeader32)
+		if err := binary.Read(sr, binary.BigEndian, fhdr); err != nil {
+			return nil, err
+		}
+		nscns = fhdr.Fnscns
+		symptr = uint64(fhdr.Fsymptr)
+		nsyms = fhdr.Fnsyms
+		opthdr = fhdr.Fopthdr
+		hdrsz = FILHSZ_32
+	case U64_TOCMAGIC:
+		fhdr := new(FileHeader64)
+		if err := binary.Read(sr, binary.BigEndian, fhdr); err != nil {
+			return nil, err
+		}
+		nscns = fhdr.Fnscns
+		symptr = fhdr.Fsymptr
+		nsyms = fhdr.Fnsyms
+		opthdr = fhdr.Fopthdr
+		hdrsz = FILHSZ_64
+	}
+
+	if symptr == 0 || nsyms <= 0 {
+		return nil, fmt.Errorf("no symbol table")
+	}
+
+	// Read string table (located right after symbol table).
+	offset := symptr + uint64(nsyms)*SYMESZ
+	if _, err := sr.Seek(int64(offset), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	// The first 4 bytes contain the length (in bytes).
+	var l uint32
+	if err := binary.Read(sr, binary.BigEndian, &l); err != nil {
+		return nil, err
+	}
+	if l > 4 {
+		if _, err := sr.Seek(int64(offset), os.SEEK_SET); err != nil {
+			return nil, err
+		}
+		f.StringTable = make([]byte, l)
+		if _, err := io.ReadFull(sr, f.StringTable); err != nil {
+			return nil, err
+		}
+	}
+
+	// Read section headers
+	if _, err := sr.Seek(int64(hdrsz)+int64(opthdr), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	f.Sections = make([]*Section, nscns)
+	for i := 0; i < int(nscns); i++ {
+		var scnptr uint64
+		s := new(Section)
+		switch f.TargetMachine {
+		case U802TOCMAGIC:
+			shdr := new(SectionHeader32)
+			if err := binary.Read(sr, binary.BigEndian, shdr); err != nil {
+				return nil, err
+			}
+			s.Name = cstring(shdr.Sname[:])
+			s.VirtualAddress = uint64(shdr.Svaddr)
+			s.Size = uint64(shdr.Ssize)
+			scnptr = uint64(shdr.Sscnptr)
+			s.Type = shdr.Sflags
+			s.Relptr = uint64(shdr.Srelptr)
+			s.Nreloc = uint32(shdr.Snreloc)
+		case U64_TOCMAGIC:
+			shdr := new(SectionHeader64)
+			if err := binary.Read(sr, binary.BigEndian, shdr); err != nil {
+				return nil, err
+			}
+			s.Name = cstring(shdr.Sname[:])
+			s.VirtualAddress = shdr.Svaddr
+			s.Size = shdr.Ssize
+			scnptr = shdr.Sscnptr
+			s.Type = shdr.Sflags
+			s.Relptr = shdr.Srelptr
+			s.Nreloc = shdr.Snreloc
+		}
+		r2 := r
+		if scnptr == 0 { // .bss must have all 0s
+			r2 = zeroReaderAt{}
+		}
+		s.sr = io.NewSectionReader(r2, int64(scnptr), int64(s.Size))
+		s.ReaderAt = s.sr
+		f.Sections[i] = s
+	}
+
+	// Symbol map needed by relocation
+	var idxToSym = make(map[int]*Symbol)
+
+	// Read symbol table
+	if _, err := sr.Seek(int64(symptr), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	f.Symbols = make([]*Symbol, 0)
+	for i := 0; i < int(nsyms); i++ {
+		var numaux int
+		var ok, needAuxFcn bool
+		sym := new(Symbol)
+		switch f.TargetMachine {
+		case U802TOCMAGIC:
+			se := new(SymEnt32)
+			if err := binary.Read(sr, binary.BigEndian, se); err != nil {
+				return nil, err
+			}
+			numaux = int(se.Nnumaux)
+			sym.SectionNumber = int(se.Nscnum)
+			sym.StorageClass = int(se.Nsclass)
+			sym.Value = uint64(se.Nvalue)
+			needAuxFcn = se.Ntype&SYM_TYPE_FUNC != 0 && numaux > 1
+			zeroes := binary.BigEndian.Uint32(se.Nname[:4])
+			if zeroes != 0 {
+				sym.Name = cstring(se.Nname[:])
+			} else {
+				offset := binary.BigEndian.Uint32(se.Nname[4:])
+				sym.Name, ok = getString(f.StringTable, offset)
+				if !ok {
+					goto skip
+				}
+			}
+		case U64_TOCMAGIC:
+			se := new(SymEnt64)
+			if err := binary.Read(sr, binary.BigEndian, se); err != nil {
+				return nil, err
+			}
+			numaux = int(se.Nnumaux)
+			sym.SectionNumber = int(se.Nscnum)
+			sym.StorageClass = int(se.Nsclass)
+			sym.Value = se.Nvalue
+			needAuxFcn = se.Ntype&SYM_TYPE_FUNC != 0 && numaux > 1
+			sym.Name, ok = getString(f.StringTable, se.Noffset)
+			if !ok {
+				goto skip
+			}
+		}
+		if sym.StorageClass != C_EXT && sym.StorageClass != C_WEAKEXT && sym.StorageClass != C_HIDEXT {
+			goto skip
+		}
+		// Must have at least one csect auxiliary entry.
+		if numaux < 1 || i+numaux >= int(nsyms) {
+			goto skip
+		}
+
+		if sym.SectionNumber > int(nscns) {
+			goto skip
+		}
+		if sym.SectionNumber == 0 {
+			sym.Value = 0
+		} else {
+			sym.Value -= f.Sections[sym.SectionNumber-1].VirtualAddress
+		}
+
+		idxToSym[i] = sym
+
+		// If this symbol is a function, it must retrieve its size from
+		// its AUX_FCN entry.
+		// It can happen that a function symbol doesn't have any AUX_FCN.
+		// In this case, needAuxFcn is false and their size will be set to 0.
+		if needAuxFcn {
+			switch f.TargetMachine {
+			case U802TOCMAGIC:
+				aux := new(AuxFcn32)
+				if err := binary.Read(sr, binary.BigEndian, aux); err != nil {
+					return nil, err
+				}
+				sym.AuxFcn.Size = int64(aux.Xfsize)
+			case U64_TOCMAGIC:
+				aux := new(AuxFcn64)
+				if err := binary.Read(sr, binary.BigEndian, aux); err != nil {
+					return nil, err
+				}
+				sym.AuxFcn.Size = int64(aux.Xfsize)
+			}
+		}
+
+		// Read csect auxiliary entry (by convention, it is the last).
+		if !needAuxFcn {
+			if _, err := sr.Seek(int64(numaux-1)*SYMESZ, os.SEEK_CUR); err != nil {
+				return nil, err
+			}
+		}
+		i += numaux
+		numaux = 0
+		switch f.TargetMachine {
+		case U802TOCMAGIC:
+			aux := new(AuxCSect32)
+			if err := binary.Read(sr, binary.BigEndian, aux); err != nil {
+				return nil, err
+			}
+			sym.AuxCSect.SymbolType = int(aux.Xsmtyp & 0x7)
+			sym.AuxCSect.StorageMappingClass = int(aux.Xsmclas)
+			sym.AuxCSect.Length = int64(aux.Xscnlen)
+		case U64_TOCMAGIC:
+			aux := new(AuxCSect64)
+			if err := binary.Read(sr, binary.BigEndian, aux); err != nil {
+				return nil, err
+			}
+			sym.AuxCSect.SymbolType = int(aux.Xsmtyp & 0x7)
+			sym.AuxCSect.StorageMappingClass = int(aux.Xsmclas)
+			sym.AuxCSect.Length = int64(aux.Xscnlenhi)<<32 | int64(aux.Xscnlenlo)
+		}
+		f.Symbols = append(f.Symbols, sym)
+	skip:
+		i += numaux // Skip auxiliary entries
+		if _, err := sr.Seek(int64(numaux)*SYMESZ, os.SEEK_CUR); err != nil {
+			return nil, err
+		}
+	}
+
+	// Read relocations
+	// Only for .data or .text section
+	for _, sect := range f.Sections {
+		if sect.Type != STYP_TEXT && sect.Type != STYP_DATA {
+			continue
+		}
+		sect.Relocs = make([]Reloc, sect.Nreloc)
+		if sect.Relptr == 0 {
+			continue
+		}
+		if _, err := sr.Seek(int64(sect.Relptr), os.SEEK_SET); err != nil {
+			return nil, err
+		}
+		for i := uint32(0); i < sect.Nreloc; i++ {
+			switch f.TargetMachine {
+			case U802TOCMAGIC:
+				rel := new(Reloc32)
+				if err := binary.Read(sr, binary.BigEndian, rel); err != nil {
+					return nil, err
+				}
+				sect.Relocs[i].VirtualAddress = uint64(rel.Rvaddr)
+				sect.Relocs[i].Symbol = idxToSym[int(rel.Rsymndx)]
+				sect.Relocs[i].Type = rel.Rtype
+				sect.Relocs[i].Length = rel.Rsize&0x3F + 1
+
+				if rel.Rsize&0x80 != 0 {
+					sect.Relocs[i].Signed = true
+				}
+				if rel.Rsize&0x40 != 0 {
+					sect.Relocs[i].InstructionFixed = true
+				}
+
+			case U64_TOCMAGIC:
+				rel := new(Reloc64)
+				if err := binary.Read(sr, binary.BigEndian, rel); err != nil {
+					return nil, err
+				}
+				sect.Relocs[i].VirtualAddress = rel.Rvaddr
+				sect.Relocs[i].Symbol = idxToSym[int(rel.Rsymndx)]
+				sect.Relocs[i].Type = rel.Rtype
+				sect.Relocs[i].Length = rel.Rsize&0x3F + 1
+				if rel.Rsize&0x80 != 0 {
+					sect.Relocs[i].Signed = true
+				}
+				if rel.Rsize&0x40 != 0 {
+					sect.Relocs[i].InstructionFixed = true
+				}
+			}
+		}
+	}
+
+	return f, nil
+}
+
+// zeroReaderAt is ReaderAt that reads 0s.
+type zeroReaderAt struct{}
+
+// ReadAt writes len(p) 0s into p.
+func (w zeroReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
+	for i := range p {
+		p[i] = 0
+	}
+	return len(p), nil
+}
+
+// Data reads and returns the contents of the XCOFF section s.
+func (s *Section) Data() ([]byte, error) {
+	dat := make([]byte, s.sr.Size())
+	n, err := s.sr.ReadAt(dat, 0)
+	if n == len(dat) {
+		err = nil
+	}
+	return dat[:n], err
+}
+
+// CSect reads and returns the contents of a csect.
+func (f *File) CSect(name string) []byte {
+	for _, sym := range f.Symbols {
+		if sym.Name == name && sym.AuxCSect.SymbolType == XTY_SD {
+			if i := sym.SectionNumber - 1; 0 <= i && i < len(f.Sections) {
+				s := f.Sections[i]
+				if sym.Value+uint64(sym.AuxCSect.Length) <= s.Size {
+					dat := make([]byte, sym.AuxCSect.Length)
+					_, err := s.sr.ReadAt(dat, int64(sym.Value))
+					if err != nil {
+						return nil
+					}
+					return dat
+				}
+			}
+			break
+		}
+	}
+	return nil
+}
+
+func (f *File) DWARF() (*dwarf.Data, error) {
+	// There are many other DWARF sections, but these
+	// are the ones the debug/dwarf package uses.
+	// Don't bother loading others.
+	var subtypes = [...]uint32{SSUBTYP_DWABREV, SSUBTYP_DWINFO, SSUBTYP_DWLINE, SSUBTYP_DWRNGES, SSUBTYP_DWSTR}
+	var dat [len(subtypes)][]byte
+	for i, subtype := range subtypes {
+		s := f.SectionByType(STYP_DWARF | subtype)
+		if s != nil {
+			b, err := s.Data()
+			if err != nil && uint64(len(b)) < s.Size {
+				return nil, err
+			}
+			dat[i] = b
+		}
+	}
+
+	abbrev, info, line, ranges, str := dat[0], dat[1], dat[2], dat[3], dat[4]
+	return dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
+}
+
+// readImportID returns the import file IDs stored inside the .loader section.
+// Library name pattern is either path/base/member or base/member
+func (f *File) readImportIDs(s *Section) ([]string, error) {
+	// Read loader header
+	if _, err := s.sr.Seek(0, os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	var istlen uint32
+	var nimpid int32
+	var impoff uint64
+	switch f.TargetMachine {
+	case U802TOCMAGIC:
+		lhdr := new(LoaderHeader32)
+		if err := binary.Read(s.sr, binary.BigEndian, lhdr); err != nil {
+			return nil, err
+		}
+		istlen = lhdr.Listlen
+		nimpid = lhdr.Lnimpid
+		impoff = uint64(lhdr.Limpoff)
+	case U64_TOCMAGIC:
+		lhdr := new(LoaderHeader64)
+		if err := binary.Read(s.sr, binary.BigEndian, lhdr); err != nil {
+			return nil, err
+		}
+		istlen = lhdr.Listlen
+		nimpid = lhdr.Lnimpid
+		impoff = lhdr.Limpoff
+	}
+
+	// Read loader import file ID table
+	if _, err := s.sr.Seek(int64(impoff), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	table := make([]byte, istlen)
+	if _, err := io.ReadFull(s.sr, table); err != nil {
+		return nil, err
+	}
+
+	offset := 0
+	// First import file ID is the default LIBPATH value
+	libpath := cstring(table[offset:])
+	f.LibraryPaths = strings.Split(libpath, ":")
+	offset += len(libpath) + 3 // 3 null bytes
+	all := make([]string, 0)
+	for i := 1; i < int(nimpid); i++ {
+		impidpath := cstring(table[offset:])
+		offset += len(impidpath) + 1
+		impidbase := cstring(table[offset:])
+		offset += len(impidbase) + 1
+		impidmem := cstring(table[offset:])
+		offset += len(impidmem) + 1
+		var path string
+		if len(impidpath) > 0 {
+			path = impidpath + "/" + impidbase + "/" + impidmem
+		} else {
+			path = impidbase + "/" + impidmem
+		}
+		all = append(all, path)
+	}
+
+	return all, nil
+}
+
+// ImportedSymbols returns the names of all symbols
+// referred to by the binary f that are expected to be
+// satisfied by other libraries at dynamic load time.
+// It does not return weak symbols.
+func (f *File) ImportedSymbols() ([]ImportedSymbol, error) {
+	s := f.SectionByType(STYP_LOADER)
+	if s == nil {
+		return nil, nil
+	}
+	// Read loader header
+	if _, err := s.sr.Seek(0, os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	var stlen uint32
+	var stoff uint64
+	var nsyms int32
+	var symoff uint64
+	switch f.TargetMachine {
+	case U802TOCMAGIC:
+		lhdr := new(LoaderHeader32)
+		if err := binary.Read(s.sr, binary.BigEndian, lhdr); err != nil {
+			return nil, err
+		}
+		stlen = lhdr.Lstlen
+		stoff = uint64(lhdr.Lstoff)
+		nsyms = lhdr.Lnsyms
+		symoff = LDHDRSZ_32
+	case U64_TOCMAGIC:
+		lhdr := new(LoaderHeader64)
+		if err := binary.Read(s.sr, binary.BigEndian, lhdr); err != nil {
+			return nil, err
+		}
+		stlen = lhdr.Lstlen
+		stoff = lhdr.Lstoff
+		nsyms = lhdr.Lnsyms
+		symoff = lhdr.Lsymoff
+	}
+
+	// Read loader section string table
+	if _, err := s.sr.Seek(int64(stoff), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	st := make([]byte, stlen)
+	if _, err := io.ReadFull(s.sr, st); err != nil {
+		return nil, err
+	}
+
+	// Read imported libraries
+	libs, err := f.readImportIDs(s)
+	if err != nil {
+		return nil, err
+	}
+
+	// Read loader symbol table
+	if _, err := s.sr.Seek(int64(symoff), os.SEEK_SET); err != nil {
+		return nil, err
+	}
+	all := make([]ImportedSymbol, 0)
+	for i := 0; i < int(nsyms); i++ {
+		var name string
+		var ifile int32
+		var ok bool
+		switch f.TargetMachine {
+		case U802TOCMAGIC:
+			ldsym := new(LoaderSymbol32)
+			if err := binary.Read(s.sr, binary.BigEndian, ldsym); err != nil {
+				return nil, err
+			}
+			if ldsym.Lsmtype&0x40 == 0 {
+				continue // Imported symbols only
+			}
+			zeroes := binary.BigEndian.Uint32(ldsym.Lname[:4])
+			if zeroes != 0 {
+				name = cstring(ldsym.Lname[:])
+			} else {
+				offset := binary.BigEndian.Uint32(ldsym.Lname[4:])
+				name, ok = getString(st, offset)
+				if !ok {
+					continue
+				}
+			}
+			ifile = ldsym.Lifile
+		case U64_TOCMAGIC:
+			ldsym := new(LoaderSymbol64)
+			if err := binary.Read(s.sr, binary.BigEndian, ldsym); err != nil {
+				return nil, err
+			}
+			if ldsym.Lsmtype&0x40 == 0 {
+				continue // Imported symbols only
+			}
+			name, ok = getString(st, ldsym.Loffset)
+			if !ok {
+				continue
+			}
+			ifile = ldsym.Lifile
+		}
+		var sym ImportedSymbol
+		sym.Name = name
+		if ifile >= 1 && int(ifile) <= len(libs) {
+			sym.Library = libs[ifile-1]
+		}
+		all = append(all, sym)
+	}
+
+	return all, nil
+}
+
+// ImportedLibraries returns the names of all libraries
+// referred to by the binary f that are expected to be
+// linked with the binary at dynamic link time.
+func (f *File) ImportedLibraries() ([]string, error) {
+	s := f.SectionByType(STYP_LOADER)
+	if s == nil {
+		return nil, nil
+	}
+	all, err := f.readImportIDs(s)
+	return all, err
+}
diff --git a/src/internal/xcoff/file_test.go b/src/internal/xcoff/file_test.go
new file mode 100644
index 0000000..a6722e9
--- /dev/null
+++ b/src/internal/xcoff/file_test.go
@@ -0,0 +1,102 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xcoff
+
+import (
+	"reflect"
+	"testing"
+)
+
+type fileTest struct {
+	file     string
+	hdr      FileHeader
+	sections []*SectionHeader
+	needed   []string
+}
+
+var fileTests = []fileTest{
+	{
+		"testdata/gcc-ppc32-aix-dwarf2-exec",
+		FileHeader{U802TOCMAGIC},
+		[]*SectionHeader{
+			{".text", 0x10000290, 0x00000bbd, STYP_TEXT, 0x7ae6, 0x36},
+			{".data", 0x20000e4d, 0x00000437, STYP_DATA, 0x7d02, 0x2b},
+			{".bss", 0x20001284, 0x0000021c, STYP_BSS, 0, 0},
+			{".loader", 0x00000000, 0x000004b3, STYP_LOADER, 0, 0},
+			{".dwline", 0x00000000, 0x000000df, STYP_DWARF | SSUBTYP_DWLINE, 0x7eb0, 0x7},
+			{".dwinfo", 0x00000000, 0x00000314, STYP_DWARF | SSUBTYP_DWINFO, 0x7ef6, 0xa},
+			{".dwabrev", 0x00000000, 0x000000d6, STYP_DWARF | SSUBTYP_DWABREV, 0, 0},
+			{".dwarnge", 0x00000000, 0x00000020, STYP_DWARF | SSUBTYP_DWARNGE, 0x7f5a, 0x2},
+			{".dwloc", 0x00000000, 0x00000074, STYP_DWARF | SSUBTYP_DWLOC, 0, 0},
+			{".debug", 0x00000000, 0x00005e4f, STYP_DEBUG, 0, 0},
+		},
+		[]string{"libc.a/shr.o"},
+	},
+	{
+		"testdata/gcc-ppc64-aix-dwarf2-exec",
+		FileHeader{U64_TOCMAGIC},
+		[]*SectionHeader{
+			{".text", 0x10000480, 0x00000afd, STYP_TEXT, 0x8322, 0x34},
+			{".data", 0x20000f7d, 0x000002f3, STYP_DATA, 0x85fa, 0x25},
+			{".bss", 0x20001270, 0x00000428, STYP_BSS, 0, 0},
+			{".loader", 0x00000000, 0x00000535, STYP_LOADER, 0, 0},
+			{".dwline", 0x00000000, 0x000000b4, STYP_DWARF | SSUBTYP_DWLINE, 0x8800, 0x4},
+			{".dwinfo", 0x00000000, 0x0000036a, STYP_DWARF | SSUBTYP_DWINFO, 0x8838, 0x7},
+			{".dwabrev", 0x00000000, 0x000000b5, STYP_DWARF | SSUBTYP_DWABREV, 0, 0},
+			{".dwarnge", 0x00000000, 0x00000040, STYP_DWARF | SSUBTYP_DWARNGE, 0x889a, 0x2},
+			{".dwloc", 0x00000000, 0x00000062, STYP_DWARF | SSUBTYP_DWLOC, 0, 0},
+			{".debug", 0x00000000, 0x00006605, STYP_DEBUG, 0, 0},
+		},
+		[]string{"libc.a/shr_64.o"},
+	},
+}
+
+func TestOpen(t *testing.T) {
+	for i := range fileTests {
+		tt := &fileTests[i]
+
+		f, err := Open(tt.file)
+		if err != nil {
+			t.Error(err)
+			continue
+		}
+		if !reflect.DeepEqual(f.FileHeader, tt.hdr) {
+			t.Errorf("open %s:\n\thave %#v\n\twant %#v\n", tt.file, f.FileHeader, tt.hdr)
+			continue
+		}
+
+		for i, sh := range f.Sections {
+			if i >= len(tt.sections) {
+				break
+			}
+			have := &sh.SectionHeader
+			want := tt.sections[i]
+			if !reflect.DeepEqual(have, want) {
+				t.Errorf("open %s, section %d:\n\thave %#v\n\twant %#v\n", tt.file, i, have, want)
+			}
+		}
+		tn := len(tt.sections)
+		fn := len(f.Sections)
+		if tn != fn {
+			t.Errorf("open %s: len(Sections) = %d, want %d", tt.file, fn, tn)
+		}
+		tl := tt.needed
+		fl, err := f.ImportedLibraries()
+		if err != nil {
+			t.Error(err)
+		}
+		if !reflect.DeepEqual(tl, fl) {
+			t.Errorf("open %s: loader import = %v, want %v", tt.file, tl, fl)
+		}
+	}
+}
+
+func TestOpenFailure(t *testing.T) {
+	filename := "file.go"    // not an XCOFF object file
+	_, err := Open(filename) // don't crash
+	if err == nil {
+		t.Errorf("open %s: succeeded unexpectedly", filename)
+	}
+}
diff --git a/src/internal/xcoff/testdata/bigar-empty b/src/internal/xcoff/testdata/bigar-empty
new file mode 100644
index 0000000..851ccc5
--- /dev/null
+++ b/src/internal/xcoff/testdata/bigar-empty
@@ -0,0 +1,2 @@
+<bigaf>
+0                   0                   0                   0                   0                   0                   
+\ No newline at end of file
diff --git a/src/internal/xcoff/testdata/bigar-ppc64 b/src/internal/xcoff/testdata/bigar-ppc64
new file mode 100644
index 0000000..a8d4979
--- /dev/null
+++ b/src/internal/xcoff/testdata/bigar-ppc64
diff --git a/src/internal/xcoff/testdata/gcc-ppc32-aix-dwarf2-exec b/src/internal/xcoff/testdata/gcc-ppc32-aix-dwarf2-exec
new file mode 100644
index 0000000..810e21a
--- /dev/null
+++ b/src/internal/xcoff/testdata/gcc-ppc32-aix-dwarf2-exec
diff --git a/src/internal/xcoff/testdata/gcc-ppc64-aix-dwarf2-exec b/src/internal/xcoff/testdata/gcc-ppc64-aix-dwarf2-exec
new file mode 100644
index 0000000..707d01e
--- /dev/null
+++ b/src/internal/xcoff/testdata/gcc-ppc64-aix-dwarf2-exec
diff --git a/src/internal/xcoff/testdata/hello.c b/src/internal/xcoff/testdata/hello.c
new file mode 100644
index 0000000..34d9ee7
--- /dev/null
+++ b/src/internal/xcoff/testdata/hello.c
@@ -0,0 +1,7 @@
+#include <stdio.h>
+
+void
+main(int argc, char *argv[])
+{
+	printf("hello, world\n");
+}
diff --git a/src/internal/xcoff/testdata/printbye.c b/src/internal/xcoff/testdata/printbye.c
new file mode 100644
index 0000000..9045079
--- /dev/null
+++ b/src/internal/xcoff/testdata/printbye.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+
+void printbye(){
+	printf("Goodbye\n");
+}
diff --git a/src/internal/xcoff/testdata/printhello.c b/src/internal/xcoff/testdata/printhello.c
new file mode 100644
index 0000000..182aa09
--- /dev/null
+++ b/src/internal/xcoff/testdata/printhello.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+
+void printhello(){
+	printf("Helloworld\n");
+}
diff --git a/src/internal/xcoff/xcoff.go b/src/internal/xcoff/xcoff.go
new file mode 100644
index 0000000..f8465d7
--- /dev/null
+++ b/src/internal/xcoff/xcoff.go
@@ -0,0 +1,367 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package xcoff
+
+// File Header.
+type FileHeader32 struct {
+	Fmagic   uint16 // Target machine
+	Fnscns   uint16 // Number of sections
+	Ftimedat int32  // Time and date of file creation
+	Fsymptr  uint32 // Byte offset to symbol table start
+	Fnsyms   int32  // Number of entries in symbol table
+	Fopthdr  uint16 // Number of bytes in optional header
+	Fflags   uint16 // Flags
+}
+
+type FileHeader64 struct {
+	Fmagic   uint16 // Target machine
+	Fnscns   uint16 // Number of sections
+	Ftimedat int32  // Time and date of file creation
+	Fsymptr  uint64 // Byte offset to symbol table start
+	Fopthdr  uint16 // Number of bytes in optional header
+	Fflags   uint16 // Flags
+	Fnsyms   int32  // Number of entries in symbol table
+}
+
+const (
+	FILHSZ_32 = 20
+	FILHSZ_64 = 24
+)
+const (
+	U802TOCMAGIC = 0737 // AIX 32-bit XCOFF
+	U64_TOCMAGIC = 0767 // AIX 64-bit XCOFF
+)
+
+// Flags that describe the type of the object file.
+const (
+	F_RELFLG    = 0x0001
+	F_EXEC      = 0x0002
+	F_LNNO      = 0x0004
+	F_FDPR_PROF = 0x0010
+	F_FDPR_OPTI = 0x0020
+	F_DSA       = 0x0040
+	F_VARPG     = 0x0100
+	F_DYNLOAD   = 0x1000
+	F_SHROBJ    = 0x2000
+	F_LOADONLY  = 0x4000
+)
+
+// Section Header.
+type SectionHeader32 struct {
+	Sname    [8]byte // Section name
+	Spaddr   uint32  // Physical address
+	Svaddr   uint32  // Virtual address
+	Ssize    uint32  // Section size
+	Sscnptr  uint32  // Offset in file to raw data for section
+	Srelptr  uint32  // Offset in file to relocation entries for section
+	Slnnoptr uint32  // Offset in file to line number entries for section
+	Snreloc  uint16  // Number of relocation entries
+	Snlnno   uint16  // Number of line number entries
+	Sflags   uint32  // Flags to define the section type
+}
+
+type SectionHeader64 struct {
+	Sname    [8]byte // Section name
+	Spaddr   uint64  // Physical address
+	Svaddr   uint64  // Virtual address
+	Ssize    uint64  // Section size
+	Sscnptr  uint64  // Offset in file to raw data for section
+	Srelptr  uint64  // Offset in file to relocation entries for section
+	Slnnoptr uint64  // Offset in file to line number entries for section
+	Snreloc  uint32  // Number of relocation entries
+	Snlnno   uint32  // Number of line number entries
+	Sflags   uint32  // Flags to define the section type
+	Spad     uint32  // Needs to be 72 bytes long
+}
+
+// Flags defining the section type.
+const (
+	STYP_DWARF  = 0x0010
+	STYP_TEXT   = 0x0020
+	STYP_DATA   = 0x0040
+	STYP_BSS    = 0x0080
+	STYP_EXCEPT = 0x0100
+	STYP_INFO   = 0x0200
+	STYP_TDATA  = 0x0400
+	STYP_TBSS   = 0x0800
+	STYP_LOADER = 0x1000
+	STYP_DEBUG  = 0x2000
+	STYP_TYPCHK = 0x4000
+	STYP_OVRFLO = 0x8000
+)
+const (
+	SSUBTYP_DWINFO  = 0x10000 // DWARF info section
+	SSUBTYP_DWLINE  = 0x20000 // DWARF line-number section
+	SSUBTYP_DWPBNMS = 0x30000 // DWARF public names section
+	SSUBTYP_DWPBTYP = 0x40000 // DWARF public types section
+	SSUBTYP_DWARNGE = 0x50000 // DWARF aranges section
+	SSUBTYP_DWABREV = 0x60000 // DWARF abbreviation section
+	SSUBTYP_DWSTR   = 0x70000 // DWARF strings section
+	SSUBTYP_DWRNGES = 0x80000 // DWARF ranges section
+	SSUBTYP_DWLOC   = 0x90000 // DWARF location lists section
+	SSUBTYP_DWFRAME = 0xA0000 // DWARF frames section
+	SSUBTYP_DWMAC   = 0xB0000 // DWARF macros section
+)
+
+// Symbol Table Entry.
+type SymEnt32 struct {
+	Nname   [8]byte // Symbol name
+	Nvalue  uint32  // Symbol value
+	Nscnum  int16   // Section number of symbol
+	Ntype   uint16  // Basic and derived type specification
+	Nsclass int8    // Storage class of symbol
+	Nnumaux int8    // Number of auxiliary entries
+}
+
+type SymEnt64 struct {
+	Nvalue  uint64 // Symbol value
+	Noffset uint32 // Offset of the name in string table or .debug section
+	Nscnum  int16  // Section number of symbol
+	Ntype   uint16 // Basic and derived type specification
+	Nsclass int8   // Storage class of symbol
+	Nnumaux int8   // Number of auxiliary entries
+}
+
+const SYMESZ = 18
+
+const (
+	// Nscnum
+	N_DEBUG = -2
+	N_ABS   = -1
+	N_UNDEF = 0
+
+	//Ntype
+	SYM_V_INTERNAL  = 0x1000
+	SYM_V_HIDDEN    = 0x2000
+	SYM_V_PROTECTED = 0x3000
+	SYM_V_EXPORTED  = 0x4000
+	SYM_TYPE_FUNC   = 0x0020 // is function
+)
+
+// Storage Class.
+const (
+	C_NULL    = 0   // Symbol table entry marked for deletion
+	C_EXT     = 2   // External symbol
+	C_STAT    = 3   // Static symbol
+	C_BLOCK   = 100 // Beginning or end of inner block
+	C_FCN     = 101 // Beginning or end of function
+	C_FILE    = 103 // Source file name and compiler information
+	C_HIDEXT  = 107 // Unnamed external symbol
+	C_BINCL   = 108 // Beginning of include file
+	C_EINCL   = 109 // End of include file
+	C_WEAKEXT = 111 // Weak external symbol
+	C_DWARF   = 112 // DWARF symbol
+	C_GSYM    = 128 // Global variable
+	C_LSYM    = 129 // Automatic variable allocated on stack
+	C_PSYM    = 130 // Argument to subroutine allocated on stack
+	C_RSYM    = 131 // Register variable
+	C_RPSYM   = 132 // Argument to function or procedure stored in register
+	C_STSYM   = 133 // Statically allocated symbol
+	C_BCOMM   = 135 // Beginning of common block
+	C_ECOML   = 136 // Local member of common block
+	C_ECOMM   = 137 // End of common block
+	C_DECL    = 140 // Declaration of object
+	C_ENTRY   = 141 // Alternate entry
+	C_FUN     = 142 // Function or procedure
+	C_BSTAT   = 143 // Beginning of static block
+	C_ESTAT   = 144 // End of static block
+	C_GTLS    = 145 // Global thread-local variable
+	C_STTLS   = 146 // Static thread-local variable
+)
+
+// File Auxiliary Entry
+type AuxFile64 struct {
+	Xfname   [8]byte // Name or offset inside string table
+	Xftype   uint8   // Source file string type
+	Xauxtype uint8   // Type of auxiliary entry
+}
+
+// Function Auxiliary Entry
+type AuxFcn32 struct {
+	Xexptr   uint32 // File offset to exception table entry
+	Xfsize   uint32 // Size of function in bytes
+	Xlnnoptr uint32 // File pointer to line number
+	Xendndx  uint32 // Symbol table index of next entry
+	Xpad     uint16 // Unused
+}
+type AuxFcn64 struct {
+	Xlnnoptr uint64 // File pointer to line number
+	Xfsize   uint32 // Size of function in bytes
+	Xendndx  uint32 // Symbol table index of next entry
+	Xpad     uint8  // Unused
+	Xauxtype uint8  // Type of auxiliary entry
+}
+
+type AuxSect64 struct {
+	Xscnlen  uint64 // section length
+	Xnreloc  uint64 // Num RLDs
+	pad      uint8
+	Xauxtype uint8 // Type of auxiliary entry
+}
+
+// csect Auxiliary Entry.
+type AuxCSect32 struct {
+	Xscnlen   int32  // Length or symbol table index
+	Xparmhash uint32 // Offset of parameter type-check string
+	Xsnhash   uint16 // .typchk section number
+	Xsmtyp    uint8  // Symbol alignment and type
+	Xsmclas   uint8  // Storage-mapping class
+	Xstab     uint32 // Reserved
+	Xsnstab   uint16 // Reserved
+}
+
+type AuxCSect64 struct {
+	Xscnlenlo uint32 // Lower 4 bytes of length or symbol table index
+	Xparmhash uint32 // Offset of parameter type-check string
+	Xsnhash   uint16 // .typchk section number
+	Xsmtyp    uint8  // Symbol alignment and type
+	Xsmclas   uint8  // Storage-mapping class
+	Xscnlenhi int32  // Upper 4 bytes of length or symbol table index
+	Xpad      uint8  // Unused
+	Xauxtype  uint8  // Type of auxiliary entry
+}
+
+// Auxiliary type
+const (
+	_AUX_EXCEPT = 255
+	_AUX_FCN    = 254
+	_AUX_SYM    = 253
+	_AUX_FILE   = 252
+	_AUX_CSECT  = 251
+	_AUX_SECT   = 250
+)
+
+// Symbol type field.
+const (
+	XTY_ER = 0 // External reference
+	XTY_SD = 1 // Section definition
+	XTY_LD = 2 // Label definition
+	XTY_CM = 3 // Common csect definition
+)
+
+// Defines for File auxiliary definitions: x_ftype field of x_file
+const (
+	XFT_FN = 0   // Source File Name
+	XFT_CT = 1   // Compile Time Stamp
+	XFT_CV = 2   // Compiler Version Number
+	XFT_CD = 128 // Compiler Defined Information
+)
+
+// Storage-mapping class.
+const (
+	XMC_PR     = 0  // Program code
+	XMC_RO     = 1  // Read-only constant
+	XMC_DB     = 2  // Debug dictionary table
+	XMC_TC     = 3  // TOC entry
+	XMC_UA     = 4  // Unclassified
+	XMC_RW     = 5  // Read/Write data
+	XMC_GL     = 6  // Global linkage
+	XMC_XO     = 7  // Extended operation
+	XMC_SV     = 8  // 32-bit supervisor call descriptor
+	XMC_BS     = 9  // BSS class
+	XMC_DS     = 10 // Function descriptor
+	XMC_UC     = 11 // Unnamed FORTRAN common
+	XMC_TC0    = 15 // TOC anchor
+	XMC_TD     = 16 // Scalar data entry in the TOC
+	XMC_SV64   = 17 // 64-bit supervisor call descriptor
+	XMC_SV3264 = 18 // Supervisor call descriptor for both 32-bit and 64-bit
+	XMC_TL     = 20 // Read/Write thread-local data
+	XMC_UL     = 21 // Read/Write thread-local data (.tbss)
+	XMC_TE     = 22 // TOC entry
+)
+
+// Loader Header.
+type LoaderHeader32 struct {
+	Lversion int32  // Loader section version number
+	Lnsyms   int32  // Number of symbol table entries
+	Lnreloc  int32  // Number of relocation table entries
+	Listlen  uint32 // Length of import file ID string table
+	Lnimpid  int32  // Number of import file IDs
+	Limpoff  uint32 // Offset to start of import file IDs
+	Lstlen   uint32 // Length of string table
+	Lstoff   uint32 // Offset to start of string table
+}
+
+type LoaderHeader64 struct {
+	Lversion int32  // Loader section version number
+	Lnsyms   int32  // Number of symbol table entries
+	Lnreloc  int32  // Number of relocation table entries
+	Listlen  uint32 // Length of import file ID string table
+	Lnimpid  int32  // Number of import file IDs
+	Lstlen   uint32 // Length of string table
+	Limpoff  uint64 // Offset to start of import file IDs
+	Lstoff   uint64 // Offset to start of string table
+	Lsymoff  uint64 // Offset to start of symbol table
+	Lrldoff  uint64 // Offset to start of relocation entries
+}
+
+const (
+	LDHDRSZ_32 = 32
+	LDHDRSZ_64 = 56
+)
+
+// Loader Symbol.
+type LoaderSymbol32 struct {
+	Lname   [8]byte // Symbol name or byte offset into string table
+	Lvalue  uint32  // Address field
+	Lscnum  int16   // Section number containing symbol
+	Lsmtype int8    // Symbol type, export, import flags
+	Lsmclas int8    // Symbol storage class
+	Lifile  int32   // Import file ID; ordinal of import file IDs
+	Lparm   uint32  // Parameter type-check field
+}
+
+type LoaderSymbol64 struct {
+	Lvalue  uint64 // Address field
+	Loffset uint32 // Byte offset into string table of symbol name
+	Lscnum  int16  // Section number containing symbol
+	Lsmtype int8   // Symbol type, export, import flags
+	Lsmclas int8   // Symbol storage class
+	Lifile  int32  // Import file ID; ordinal of import file IDs
+	Lparm   uint32 // Parameter type-check field
+}
+
+type Reloc32 struct {
+	Rvaddr  uint32 // (virtual) address of reference
+	Rsymndx uint32 // Index into symbol table
+	Rsize   uint8  // Sign and reloc bit len
+	Rtype   uint8  // Toc relocation type
+}
+
+type Reloc64 struct {
+	Rvaddr  uint64 // (virtual) address of reference
+	Rsymndx uint32 // Index into symbol table
+	Rsize   uint8  // Sign and reloc bit len
+	Rtype   uint8  // Toc relocation type
+}
+
+const (
+	R_POS = 0x00 // A(sym) Positive Relocation
+	R_NEG = 0x01 // -A(sym) Negative Relocation
+	R_REL = 0x02 // A(sym-*) Relative to self
+	R_TOC = 0x03 // A(sym-TOC) Relative to TOC
+	R_TRL = 0x12 // A(sym-TOC) TOC Relative indirect load.
+
+	R_TRLA = 0x13 // A(sym-TOC) TOC Rel load address. modifiable inst
+	R_GL   = 0x05 // A(external TOC of sym) Global Linkage
+	R_TCL  = 0x06 // A(local TOC of sym) Local object TOC address
+	R_RL   = 0x0C // A(sym) Pos indirect load. modifiable instruction
+	R_RLA  = 0x0D // A(sym) Pos Load Address. modifiable instruction
+	R_REF  = 0x0F // AL0(sym) Non relocating ref. No garbage collect
+	R_BA   = 0x08 // A(sym) Branch absolute. Cannot modify instruction
+	R_RBA  = 0x18 // A(sym) Branch absolute. modifiable instruction
+	R_BR   = 0x0A // A(sym-*) Branch rel to self. non modifiable
+	R_RBR  = 0x1A // A(sym-*) Branch rel to self. modifiable instr
+
+	R_TLS    = 0x20 // General-dynamic reference to TLS symbol
+	R_TLS_IE = 0x21 // Initial-exec reference to TLS symbol
+	R_TLS_LD = 0x22 // Local-dynamic reference to TLS symbol
+	R_TLS_LE = 0x23 // Local-exec reference to TLS symbol
+	R_TLSM   = 0x24 // Module reference to TLS symbol
+	R_TLSML  = 0x25 // Module reference to local (own) module
+
+	R_TOCU = 0x30 // Relative to TOC - high order bits
+	R_TOCL = 0x31 // Relative to TOC - low order bits
+)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 13:15:26 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 13:15:26 +0000
commit	82539ad8d59729fb45b0bb0edda8f2bddb719eb1 (patch)
tree	58f0b58e6f44f0e04d4a6373132cf426fa835fa7 /src/internal
parent	Initial commit. (diff)
download	golang-1.17-82539ad8d59729fb45b0bb0edda8f2bddb719eb1.tar.xz golang-1.17-82539ad8d59729fb45b0bb0edda8f2bddb719eb1.zip