44 files changed, 8011 insertions, 0 deletions
diff --git a/test/codegen/README b/test/codegen/README
new file mode 100644
index 0000000..b803fe5
--- /dev/null
+++ b/test/codegen/README
@@ -0,0 +1,153 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+The codegen directory contains code generation tests for the gc
+compiler.
+
+
+- Introduction
+
+The test harness compiles Go code inside files in this directory and
+matches the generated assembly (the output of `go tool compile -S`)
+against a set of regexps to be specified in comments that follow a
+special syntax (described below). The test driver is implemented as a
+step of the top-level test/run.go suite, called "asmcheck".
+
+The codegen harness is part of the all.bash test suite, but for
+performance reasons only the codegen tests for the host machine's
+GOARCH are enabled by default, and only on GOOS=linux.
+
+To perform comprehensive tests for all the supported architectures
+(even on a non-Linux system), one can run the following command
+
+  $ ../bin/go run run.go -all_codegen -v codegen
+
+in the top-level test directory. This is recommended after any change
+that affect the compiler's code.
+
+The test harness compiles the tests with the same go toolchain that is
+used to run run.go. After writing tests for a newly added codegen
+transformation, it can be useful to first run the test harness with a
+toolchain from a released Go version (and verify that the new tests
+fail), and then re-runnig the tests using the devel toolchain.
+
+
+- Regexps comments syntax
+
+Instructions to match are specified inside plain comments that start
+with an architecture tag, followed by a colon and a quoted Go-style
+regexp to be matched. For example, the following test:
+
+  func Sqrt(x float64) float64 {
+  	   // amd64:"SQRTSD"
+  	   // arm64:"FSQRTD"
+  	   return math.Sqrt(x)
+  }
+
+verifies that math.Sqrt calls are intrinsified to a SQRTSD instruction
+on amd64, and to a FSQRTD instruction on arm64.
+
+It is possible to put multiple architectures checks into the same
+line, as:
+
+  // amd64:"SQRTSD" arm64:"FSQRTD"
+
+although this form should be avoided when doing so would make the
+regexps line excessively long and difficult to read.
+
+Comments that are on their own line will be matched against the first
+subsequent non-comment line. Inline comments are also supported; the
+regexp will be matched against the code found on the same line:
+
+  func Sqrt(x float64) float64 {
+  	   return math.Sqrt(x) // arm:"SQRTD"
+  }
+
+It's possible to specify a comma-separated list of regexps to be
+matched. For example, the following test:
+
+  func TZ8(n uint8) int {
+  	   // amd64:"BSFQ","ORQ\t\\$256"
+  	   return bits.TrailingZeros8(n)
+  }
+
+verifies that the code generated for a bits.TrailingZeros8 call on
+amd64 contains both a "BSFQ" instruction and an "ORQ $256".
+
+Note how the ORQ regex includes a tab char (\t). In the Go assembly
+syntax, operands are separated from opcodes by a tabulation.
+
+Regexps can be quoted using either " or `. Special characters must be
+escaped accordingly. Both of these are accepted, and equivalent:
+
+  // amd64:"ADDQ\t\\$3"
+  // amd64:`ADDQ\t\$3`
+
+and they'll match this assembly line:
+
+  ADDQ	$3
+
+Negative matches can be specified using a - before the quoted regexp.
+For example:
+
+  func MoveSmall() {
+  	   x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+  	   copy(x[1:], x[:]) // arm64:-".*memmove"
+  }
+
+verifies that NO memmove call is present in the assembly generated for
+the copy() line.
+
+
+- Architecture specifiers
+
+There are three different ways to specify on which architecture a test
+should be run:
+
+* Specify only the architecture (eg: "amd64"). This indicates that the
+  check should be run on all the supported architecture variants. For
+  instance, arm checks will be run against all supported GOARM
+  variations (5,6,7).
+* Specify both the architecture and a variant, separated by a slash
+  (eg: "arm/7"). This means that the check will be run only on that
+  specific variant.
+* Specify the operating system, the architecture and the variant,
+  separated by slashes (eg: "plan9/386/sse2", "plan9/amd64/"). This is
+  needed in the rare case that you need to do a codegen test affected
+  by a specific operating system; by default, tests are compiled only
+  targeting linux.
+
+
+- Remarks, and Caveats
+
+-- Write small test functions
+
+As a general guideline, test functions should be small, to avoid
+possible interactions between unrelated lines of code that may be
+introduced, for example, by the compiler's optimization passes.
+
+Any given line of Go code could get assigned more instructions than it
+may appear from reading the source. In particular, matching all MOV
+instructions should be avoided; the compiler may add them for
+unrelated reasons and this may render the test ineffective.
+
+-- Line matching logic
+
+Regexps are always matched from the start of the instructions line.
+This means, for example, that the "MULQ" regexp is equivalent to
+"^MULQ" (^ representing the start of the line), and it will NOT match
+the following assembly line:
+
+  IMULQ	$99, AX
+
+To force a match at any point of the line, ".*MULQ" should be used.
+
+For the same reason, a negative regexp like -"memmove" is not enough
+to make sure that no memmove call is included in the assembly. A
+memmove call looks like this:
+
+  CALL	runtime.memmove(SB)
+
+To make sure that the "memmove" symbol does not appear anywhere in the
+assembly, the negative regexp to be used is -".*memmove".
diff --git a/test/codegen/addrcalc.go b/test/codegen/addrcalc.go
new file mode 100644
index 0000000..45552d2
--- /dev/null
+++ b/test/codegen/addrcalc.go
@@ -0,0 +1,14 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we use ADDQ instead of LEAQ when we can.
+
+func f(p *[4][2]int, x int) *int {
+	// amd64:"ADDQ",-"LEAQ"
+	return &p[x][0]
+}
diff --git a/test/codegen/alloc.go b/test/codegen/alloc.go
new file mode 100644
index 0000000..31455fd
--- /dev/null
+++ b/test/codegen/alloc.go
@@ -0,0 +1,34 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that allocating a 0-size object does not
+// introduce a call to runtime.newobject.
+
+package codegen
+
+func zeroAllocNew1() *struct{} {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return new(struct{})
+}
+
+func zeroAllocNew2() *[0]int {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return new([0]int)
+}
+
+func zeroAllocSliceLit() []int {
+	// 386:-`CALL\truntime\.newobject`
+	// amd64:-`CALL\truntime\.newobject`
+	// arm:-`CALL\truntime\.newobject`
+	// arm64:-`CALL\truntime\.newobject`
+	return []int{}
+}
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
new file mode 100644
index 0000000..a27a17f
--- /dev/null
+++ b/test/codegen/arithmetic.go
@@ -0,0 +1,553 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on integer types.
+// For codegen tests on float types, see floats.go.
+
+// ----------------- //
+//    Subtraction    //
+// ----------------- //
+
+var ef int
+
+func SubMem(arr []int, b, c, d int) int {
+	// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
+	// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
+	arr[2] -= b
+	// 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
+	// amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
+	arr[3] -= b
+	// 386:`DECL\s16\([A-Z]+\)`
+	arr[4]--
+	// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
+	arr[5] -= 20
+	// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
+	ef -= arr[b]
+	// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[c] -= b
+	// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[d] -= 15
+	// 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
+	arr[b]--
+	// amd64:`DECQ\s64\([A-Z]+\)`
+	arr[8]--
+	// 386:"SUBL\t4"
+	// amd64:"SUBQ\t8"
+	return arr[0] - arr[1]
+}
+
+func SubFromConst(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	b := 40 - a
+	return b
+}
+
+func SubFromConstNeg(a int) int {
+	// ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]40,\sR[0-9]+,\sR`
+	c := 40 - (-a)
+	return c
+}
+
+func SubSubFromConst(a int) int {
+	// ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]20,\sR[0-9]+,\sR`
+	c := 40 - (20 - a)
+	return c
+}
+
+func AddSubFromConst(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR`
+	c := 40 + (20 - a)
+	return c
+}
+
+func NegSubFromConst(a int) int {
+	// ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR`
+	c := -(20 - a)
+	return c
+}
+
+func NegAddFromConstNeg(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	c := -(-40 + a)
+	return c
+}
+
+// -------------------- //
+//    Multiplication    //
+// -------------------- //
+
+func Pow2Muls(n1, n2 int) (int, int) {
+	// amd64:"SHLQ\t[$]5",-"IMULQ"
+	// 386:"SHLL\t[$]5",-"IMULL"
+	// arm:"SLL\t[$]5",-"MUL"
+	// arm64:"LSL\t[$]5",-"MUL"
+	// ppc64:"SLD\t[$]5",-"MUL"
+	// ppc64le:"SLD\t[$]5",-"MUL"
+	a := n1 * 32
+
+	// amd64:"SHLQ\t[$]6",-"IMULQ"
+	// 386:"SHLL\t[$]6",-"IMULL"
+	// arm:"SLL\t[$]6",-"MUL"
+	// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
+	// ppc64:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
+	// ppc64le:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
+	b := -64 * n2
+
+	return a, b
+}
+
+func Mul_96(n int) int {
+	// amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
+	// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
+	// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+	// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+	// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
+	return n * 96
+}
+
+func Mul_n120(n int) int {
+	// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
+	return n * -120
+}
+
+func MulMemSrc(a []uint32, b []float32) {
+	// 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
+	a[0] *= a[1]
+	// 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+	// amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+	b[0] *= b[1]
+}
+
+// Multiplications merging tests
+
+func MergeMuls1(n int) int {
+	// amd64:"IMUL3Q\t[$]46"
+	// 386:"IMUL3L\t[$]46"
+	return 15*n + 31*n // 46n
+}
+
+func MergeMuls2(n int) int {
+	// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
+	// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
+	return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
+}
+
+func MergeMuls3(a, n int) int {
+	// amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
+	// 386:"ADDL\t[$]19",-"IMULL\t[$]19"
+	return a*n + 19*n // (a+19)n
+}
+
+func MergeMuls4(n int) int {
+	// amd64:"IMUL3Q\t[$]14"
+	// 386:"IMUL3L\t[$]14"
+	return 23*n - 9*n // 14n
+}
+
+func MergeMuls5(a, n int) int {
+	// amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
+	// 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
+	return a*n - 19*n // (a-19)n
+}
+
+// -------------- //
+//    Division    //
+// -------------- //
+
+func DivMemSrc(a []float64) {
+	// 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+	// amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+	a[0] /= a[1]
+}
+
+func Pow2Divs(n1 uint, n2 int) (uint, int) {
+	// 386:"SHRL\t[$]5",-"DIVL"
+	// amd64:"SHRQ\t[$]5",-"DIVQ"
+	// arm:"SRL\t[$]5",-".*udiv"
+	// arm64:"LSR\t[$]5",-"UDIV"
+	// ppc64:"SRD"
+	// ppc64le:"SRD"
+	a := n1 / 32 // unsigned
+
+	// amd64:"SARQ\t[$]6",-"IDIVQ"
+	// 386:"SARL\t[$]6",-"IDIVL"
+	// arm:"SRA\t[$]6",-".*udiv"
+	// arm64:"ASR\t[$]6",-"SDIV"
+	// ppc64:"SRAD"
+	// ppc64le:"SRAD"
+	b := n2 / 64 // signed
+
+	return a, b
+}
+
+// Check that constant divisions get turned into MULs
+func ConstDivs(n1 uint, n2 int) (uint, int) {
+	// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+	// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+	// arm64:`MOVD`,`UMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	a := n1 / 17 // unsigned
+
+	// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+	// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+	// arm64:`MOVD`,`SMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	b := n2 / 17 // signed
+
+	return a, b
+}
+
+func FloatDivs(a []float32) float32 {
+	// amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+	// 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+	return a[1] / a[2]
+}
+
+func Pow2Mods(n1 uint, n2 int) (uint, int) {
+	// 386:"ANDL\t[$]31",-"DIVL"
+	// amd64:"ANDQ\t[$]31",-"DIVQ"
+	// arm:"AND\t[$]31",-".*udiv"
+	// arm64:"AND\t[$]31",-"UDIV"
+	// ppc64:"ANDCC\t[$]31"
+	// ppc64le:"ANDCC\t[$]31"
+	a := n1 % 32 // unsigned
+
+	// 386:"SHRL",-"IDIVL"
+	// amd64:"SHRQ",-"IDIVQ"
+	// arm:"SRA",-".*udiv"
+	// arm64:"ASR",-"REM"
+	// ppc64:"SRAD"
+	// ppc64le:"SRAD"
+	b := n2 % 64 // signed
+
+	return a, b
+}
+
+// Check that signed divisibility checks get converted to AND on low bits
+func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
+	// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+	// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+	// arm:"AND\t[$]63",-".*udiv",-"SRA"
+	// arm64:"AND\t[$]63",-"UDIV",-"ASR"
+	// ppc64:"ANDCC\t[$]63",-"SRAD"
+	// ppc64le:"ANDCC\t[$]63",-"SRAD"
+	a := n1%64 == 0 // signed divisible
+
+	// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+	// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+	// arm:"AND\t[$]63",-".*udiv",-"SRA"
+	// arm64:"AND\t[$]63",-"UDIV",-"ASR"
+	// ppc64:"ANDCC\t[$]63",-"SRAD"
+	// ppc64le:"ANDCC\t[$]63",-"SRAD"
+	b := n2%64 != 0 // signed indivisible
+
+	return a, b
+}
+
+// Check that constant modulo divs get turned into MULs
+func ConstMods(n1 uint, n2 int) (uint, int) {
+	// amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+	// 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+	// arm64:`MOVD`,`UMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	a := n1 % 17 // unsigned
+
+	// amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+	// 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+	// arm64:`MOVD`,`SMULH`,-`DIV`
+	// arm:`MOVW`,`MUL`,-`.*udiv`
+	b := n2 % 17 // signed
+
+	return a, b
+}
+
+// Check that divisibility checks x%c==0 are converted to MULs and rotates
+func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
+	// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
+	// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
+	// arm64:"MOVD\t[$]-6148914691236517205","MUL","ROR",-"DIV"
+	// arm:"MUL","CMP\t[$]715827882",-".*udiv"
+	// ppc64:"MULLD","ROTL\t[$]63"
+	// ppc64le:"MULLD","ROTL\t[$]63"
+	evenU := n1%6 == 0
+
+	// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
+	// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
+	// arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
+	// arm:"MUL","CMP\t[$]226050910",-".*udiv"
+	// ppc64:"MULLD",-"ROTL"
+	// ppc64le:"MULLD",-"ROTL"
+	oddU := n1%19 == 0
+
+	// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
+	// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
+	// arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
+	// arm:"MUL","ADD\t[$]715827882",-".*udiv"
+	// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
+	// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
+	// ppc64/power9:"MADDLD","ROTL\t[$]63"
+	// ppc64le/power9:"MADDLD","ROTL\t[$]63"
+	evenS := n2%6 == 0
+
+	// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
+	// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
+	// arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
+	// arm:"MUL","ADD\t[$]113025455",-".*udiv"
+	// ppc64/power8:"MULLD","ADD",-"ROTL"
+	// ppc64/power9:"MADDLD",-"ROTL"
+	// ppc64le/power8:"MULLD","ADD",-"ROTL"
+	// ppc64le/power9:"MADDLD",-"ROTL"
+	oddS := n2%19 == 0
+
+	return evenU, oddU, evenS, oddS
+}
+
+// Check that fix-up code is not generated for divisions where it has been proven that
+// that the divisor is not -1 or that the dividend is > MinIntNN.
+func NoFix64A(divr int64) (int64, int64) {
+	var d int64 = 42
+	var e int64 = 84
+	if divr > 5 {
+		d /= divr // amd64:-"JMP"
+		e %= divr // amd64:-"JMP"
+		// The following statement is to avoid conflict between the above check
+		// and the normal JMP generated at the end of the block.
+		d += e
+	}
+	return d, e
+}
+
+func NoFix64B(divd int64) (int64, int64) {
+	var d int64
+	var e int64
+	var divr int64 = -1
+	if divd > -9223372036854775808 {
+		d = divd / divr // amd64:-"JMP"
+		e = divd % divr // amd64:-"JMP"
+		d += e
+	}
+	return d, e
+}
+
+func NoFix32A(divr int32) (int32, int32) {
+	var d int32 = 42
+	var e int32 = 84
+	if divr > 5 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d /= divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e %= divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix32B(divd int32) (int32, int32) {
+	var d int32
+	var e int32
+	var divr int32 = -1
+	if divd > -2147483648 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d = divd / divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e = divd % divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix16A(divr int16) (int16, int16) {
+	var d int16 = 42
+	var e int16 = 84
+	if divr > 5 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d /= divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e %= divr
+		d += e
+	}
+	return d, e
+}
+
+func NoFix16B(divd int16) (int16, int16) {
+	var d int16
+	var e int16
+	var divr int16 = -1
+	if divd > -32768 {
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		d = divd / divr
+		// amd64:-"JMP"
+		// 386:-"JMP"
+		e = divd % divr
+		d += e
+	}
+	return d, e
+}
+
+// Check that len() and cap() calls divided by powers of two are
+// optimized into shifts and ands
+
+func LenDiv1(a []int) int {
+	// 386:"SHRL\t[$]10"
+	// amd64:"SHRQ\t[$]10"
+	// arm64:"LSR\t[$]10",-"SDIV"
+	// arm:"SRL\t[$]10",-".*udiv"
+	// ppc64:"SRD"\t[$]10"
+	// ppc64le:"SRD"\t[$]10"
+	return len(a) / 1024
+}
+
+func LenDiv2(s string) int {
+	// 386:"SHRL\t[$]11"
+	// amd64:"SHRQ\t[$]11"
+	// arm64:"LSR\t[$]11",-"SDIV"
+	// arm:"SRL\t[$]11",-".*udiv"
+	// ppc64:"SRD\t[$]11"
+	// ppc64le:"SRD\t[$]11"
+	return len(s) / (4097 >> 1)
+}
+
+func LenMod1(a []int) int {
+	// 386:"ANDL\t[$]1023"
+	// amd64:"ANDQ\t[$]1023"
+	// arm64:"AND\t[$]1023",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64:"ANDCC\t[$]1023"
+	// ppc64le:"ANDCC\t[$]1023"
+	return len(a) % 1024
+}
+
+func LenMod2(s string) int {
+	// 386:"ANDL\t[$]2047"
+	// amd64:"ANDQ\t[$]2047"
+	// arm64:"AND\t[$]2047",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64:"ANDCC\t[$]2047"
+	// ppc64le:"ANDCC\t[$]2047"
+	return len(s) % (4097 >> 1)
+}
+
+func CapDiv(a []int) int {
+	// 386:"SHRL\t[$]12"
+	// amd64:"SHRQ\t[$]12"
+	// arm64:"LSR\t[$]12",-"SDIV"
+	// arm:"SRL\t[$]12",-".*udiv"
+	// ppc64:"SRD\t[$]12"
+	// ppc64le:"SRD\t[$]12"
+	return cap(a) / ((1 << 11) + 2048)
+}
+
+func CapMod(a []int) int {
+	// 386:"ANDL\t[$]4095"
+	// amd64:"ANDQ\t[$]4095"
+	// arm64:"AND\t[$]4095",-"SDIV"
+	// arm/6:"AND",-".*udiv"
+	// arm/7:"BFC",-".*udiv",-"AND"
+	// ppc64:"ANDCC\t[$]4095"
+	// ppc64le:"ANDCC\t[$]4095"
+	return cap(a) % ((1 << 11) + 2048)
+}
+
+func AddMul(x int) int {
+	// amd64:"LEAQ\t1"
+	return 2*x + 1
+}
+
+func MULA(a, b, c uint32) (uint32, uint32, uint32) {
+	// arm:`MULA`,-`MUL\s`
+	// arm64:`MADDW`,-`MULW`
+	r0 := a*b + c
+	// arm:`MULA`,-`MUL\s`
+	// arm64:`MADDW`,-`MULW`
+	r1 := c*79 + a
+	// arm:`ADD`,-`MULA`,-`MUL\s`
+	// arm64:`ADD`,-`MADD`,-`MULW`
+	r2 := b*64 + c
+	return r0, r1, r2
+}
+
+func MULS(a, b, c uint32) (uint32, uint32, uint32) {
+	// arm/7:`MULS`,-`MUL\s`
+	// arm/6:`SUB`,`MUL\s`,-`MULS`
+	// arm64:`MSUBW`,-`MULW`
+	r0 := c - a*b
+	// arm/7:`MULS`,-`MUL\s`
+	// arm/6:`SUB`,`MUL\s`,-`MULS`
+	// arm64:`MSUBW`,-`MULW`
+	r1 := a - c*79
+	// arm/7:`SUB`,-`MULS`,-`MUL\s`
+	// arm64:`SUB`,-`MSUBW`,-`MULW`
+	r2 := c - b*64
+	return r0, r1, r2
+}
+
+func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
+	// amd64:`INCL`
+	a++
+	// amd64:`DECL`
+	b--
+	// amd64:`SUBL.*-128`
+	c += 128
+	return a, b, c
+}
+
+// Divide -> shift rules usually require fixup for negative inputs.
+// If the input is non-negative, make sure the fixup is eliminated.
+func divInt(v int64) int64 {
+	if v < 0 {
+		return 0
+	}
+	// amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
+	return v / 512
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(z + C) -x -> C + (z - x)" can optimize the following cases.
+func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
+	// arm64:"SUB","ADD\t[$]2"
+	r0 := (i0 + 3) - (j0 + 1)
+	// arm64:"SUB","SUB\t[$]4"
+	r1 := (i1 - 3) - (j1 + 1)
+	// arm64:"SUB","ADD\t[$]4"
+	r2 := (i2 + 3) - (j2 - 1)
+	// arm64:"SUB","SUB\t[$]2"
+	r3 := (i3 - 3) - (j3 - 1)
+	return r0, r1, r2, r3
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(C - z) - x -> C - (z + x)" can optimize the following cases.
+func constantFold2(i0, j0, i1, j1 int) (int, int) {
+	// arm64:"ADD","MOVD\t[$]2","SUB"
+	r0 := (3 - i0) - (j0 + 1)
+	// arm64:"ADD","MOVD\t[$]4","SUB"
+	r1 := (3 - i1) - (j1 - 1)
+	return r0, r1
+}
+
+func constantFold3(i, j int) int {
+	// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
+	r := (5 * i) * (6 * j)
+	return r
+}
diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go
new file mode 100644
index 0000000..0fe6799
--- /dev/null
+++ b/test/codegen/bitfield.go
@@ -0,0 +1,277 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to bit field
+// insertion/extraction simplifications/optimizations.
+
+func extr1(x, x2 uint64) uint64 {
+	return x<<7 + x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr2(x, x2 uint64) uint64 {
+	return x<<7 | x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr3(x, x2 uint64) uint64 {
+	return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr4(x, x2 uint32) uint32 {
+	return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr5(x, x2 uint32) uint32 {
+	return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr6(x, x2 uint32) uint32 {
+	return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+// check 32-bit shift masking
+func mask32(x uint32) uint32 {
+	return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 16-bit shift masking
+func mask16(x uint16) uint16 {
+	return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 8-bit shift masking
+func mask8(x uint8) uint8 {
+	return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL"
+}
+
+func maskshift(x uint64) uint64 {
+	// arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX"
+	return ((x << 5) & (0xfff << 5)) >> 5
+}
+
+// bitfield ops
+// bfi
+func bfi1(x, y uint64) uint64 {
+	// arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f)
+}
+
+func bfi2(x, y uint64) uint64 {
+	// arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+	return (x << 24 >> 12) | (y & 0xfff0000000000fff)
+}
+
+// bfxil
+func bfxil1(x, y uint64) uint64 {
+	// arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000)
+}
+
+func bfxil2(x, y uint64) uint64 {
+	// arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+	return (x << 12 >> 24) | (y & 0xffffff0000000000)
+}
+
+// sbfiz
+func sbfiz1(x int64) int64 {
+	// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+	return (x << 4) >> 3
+}
+
+func sbfiz2(x int32) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL"
+}
+
+func sbfiz3(x int16) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL"
+}
+
+func sbfiz4(x int8) int64 {
+	return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL"
+}
+
+func sbfiz5(x int32) int32 {
+	// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+	return (x << 4) >> 3
+}
+
+// sbfx
+func sbfx1(x int64) int64 {
+	return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+}
+
+func sbfx2(x int64) int64 {
+	return (x << 60) >> 60 // arm64:"SBFX\tZR, R[0-9]+, [$]4",-"LSL",-"ASR"
+}
+
+func sbfx3(x int32) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR"
+}
+
+func sbfx4(x int16) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR"
+}
+
+func sbfx5(x int8) int64 {
+	return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR"
+}
+
+func sbfx6(x int32) int32 {
+	return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+}
+
+// ubfiz
+func ubfiz1(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
+	// s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND"
+	return (x & 0xfff) << 3
+}
+
+func ubfiz2(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
+	// s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND"
+	return (x << 4) & 0xfff0
+}
+
+func ubfiz3(x uint32) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL"
+}
+
+func ubfiz4(x uint16) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
+}
+
+func ubfiz5(x uint8) uint64 {
+	return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL"
+}
+
+func ubfiz6(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD"
+	return (x << 4) >> 3
+}
+
+func ubfiz7(x uint32) uint32 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR"
+	return (x << 4) >> 3
+}
+
+func ubfiz8(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND"
+	return ((x & 0xfffff) << 4) >> 3
+}
+
+func ubfiz9(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND"
+	return ((x << 3) & 0xffff) << 2
+}
+
+func ubfiz10(x uint64) uint64 {
+	// arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND"
+	return ((x << 5) & (0xfff << 5)) << 2
+}
+
+// ubfx
+func ubfx1(x uint64) uint64 {
+	// arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND"
+	return (x >> 25) & 1023
+}
+
+func ubfx2(x uint64) uint64 {
+	// arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND"
+	return (x & 0x0ff0) >> 4
+}
+
+func ubfx3(x uint32) uint64 {
+	return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR"
+}
+
+func ubfx4(x uint16) uint64 {
+	return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR"
+}
+
+func ubfx5(x uint8) uint64 {
+	return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR"
+}
+
+func ubfx6(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD"
+	return (x << 1) >> 2
+}
+
+func ubfx7(x uint32) uint32 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
+	return (x << 1) >> 2
+}
+
+func ubfx8(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND"
+	return ((x << 1) >> 2) & 0xfff
+}
+
+func ubfx9(x uint64) uint64 {
+	// arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
+	// s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND"
+	return ((x >> 3) & 0xfff) >> 1
+}
+
+func ubfx10(x uint64) uint64 {
+	// arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD"
+	return ((x >> 2) << 5) >> 8
+}
+
+func ubfx11(x uint64) uint64 {
+	// arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
+	// s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND"
+	return ((x & 0xfffff) << 3) >> 4
+}
+
+// Check that we don't emit comparisons for constant shifts.
+//go:nosplit
+func shift_no_cmp(x int) int {
+	// arm64:`LSL\t[$]17`,-`CMP`
+	// mips64:`SLLV\t[$]17`,-`SGT`
+	return x << 17
+}
+
+func rev16(c uint64) (uint64, uint64, uint64) {
+	// arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8`
+	b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
+	// arm64:-`ADD\tR[0-9]+<<8`
+	b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8)
+	// arm64:-`EOR\tR[0-9]+<<8`
+	b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8)
+	return b1, b2, b3
+}
+
+func rev16w(c uint32) (uint32, uint32, uint32) {
+	// arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8`
+	b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+	// arm64:-`ADD\tR[0-9]+<<8`
+	b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8)
+	// arm64:-`EOR\tR[0-9]+<<8`
+	b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8)
+	return b1, b2, b3
+}
+
+func shift(x uint32, y uint16, z uint8) uint64 {
+	// arm64:-`MOVWU`,-`LSR\t[$]32`
+	a := uint64(x) >> 32
+	// arm64:-`MOVHU
+	b := uint64(y) >> 16
+	// arm64:-`MOVBU`
+	c := uint64(z) >> 8
+	// arm64:`MOVD\tZR`,-`ADD\tR[0-9]+>>16`,-`ADD\tR[0-9]+>>8`,
+	return a + b + c
+}
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
new file mode 100644
index 0000000..8117a62
--- /dev/null
+++ b/test/codegen/bits.go
@@ -0,0 +1,357 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+/************************************
+ * 64-bit instructions
+ ************************************/
+
+func bitcheck64_constleft(a uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if a&(1<<63) != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]60"
+	if a&(1<<60) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&(1<<0) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_constright(a [8]uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if (a[0]>>63)&1 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]63"
+	if a[1]>>63 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]63"
+	if a[2]>>63 == 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]60"
+	if (a[3]>>60)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]1"
+	if (a[4]>>1)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if (a[5]>>0)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]7"
+	if (a[6]>>5)&4 == 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_var(a, b uint64) (n int) {
+	// amd64:"BTQ"
+	if a&(1<<(b&63)) != 0 {
+		return 1
+	}
+	// amd64:"BTQ",-"BT.\t[$]0"
+	if (b>>(a&63))&1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck64_mask(a uint64) (n int) {
+	// amd64:"BTQ\t[$]63"
+	if a&0x8000000000000000 != 0 {
+		return 1
+	}
+	// amd64:"BTQ\t[$]59"
+	if a&0x800000000000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&0x1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func biton64(a, b uint64) (n uint64) {
+	// amd64:"BTSQ"
+	n += b | (1 << (a & 63))
+
+	// amd64:"BTSQ\t[$]63"
+	n += a | (1 << 63)
+
+	// amd64:"BTSQ\t[$]60"
+	n += a | (1 << 60)
+
+	// amd64:"ORQ\t[$]1"
+	n += a | (1 << 0)
+
+	return n
+}
+
+func bitoff64(a, b uint64) (n uint64) {
+	// amd64:"BTRQ"
+	n += b &^ (1 << (a & 63))
+
+	// amd64:"BTRQ\t[$]63"
+	n += a &^ (1 << 63)
+
+	// amd64:"BTRQ\t[$]60"
+	n += a &^ (1 << 60)
+
+	// amd64:"ANDQ\t[$]-2"
+	n += a &^ (1 << 0)
+
+	return n
+}
+
+func bitcompl64(a, b uint64) (n uint64) {
+	// amd64:"BTCQ"
+	n += b ^ (1 << (a & 63))
+
+	// amd64:"BTCQ\t[$]63"
+	n += a ^ (1 << 63)
+
+	// amd64:"BTCQ\t[$]60"
+	n += a ^ (1 << 60)
+
+	// amd64:"XORQ\t[$]1"
+	n += a ^ (1 << 0)
+
+	return n
+}
+
+/************************************
+ * 32-bit instructions
+ ************************************/
+
+func bitcheck32_constleft(a uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if a&(1<<31) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]28"
+	if a&(1<<28) != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&(1<<0) != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_constright(a [8]uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if (a[0]>>31)&1 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]31"
+	if a[1]>>31 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]31"
+	if a[2]>>31 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]28"
+	if (a[3]>>28)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]1"
+	if (a[4]>>1)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if (a[5]>>0)&1 == 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]7"
+	if (a[6]>>5)&4 == 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_var(a, b uint32) (n int) {
+	// amd64:"BTL"
+	if a&(1<<(b&31)) != 0 {
+		return 1
+	}
+	// amd64:"BTL",-"BT.\t[$]0"
+	if (b>>(a&31))&1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func bitcheck32_mask(a uint32) (n int) {
+	// amd64:"BTL\t[$]31"
+	if a&0x80000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]27"
+	if a&0x8000000 != 0 {
+		return 1
+	}
+	// amd64:"BTL\t[$]0"
+	if a&0x1 != 0 {
+		return 1
+	}
+	return 0
+}
+
+func biton32(a, b uint32) (n uint32) {
+	// amd64:"BTSL"
+	n += b | (1 << (a & 31))
+
+	// amd64:"BTSL\t[$]31"
+	n += a | (1 << 31)
+
+	// amd64:"BTSL\t[$]28"
+	n += a | (1 << 28)
+
+	// amd64:"ORL\t[$]1"
+	n += a | (1 << 0)
+
+	return n
+}
+
+func bitoff32(a, b uint32) (n uint32) {
+	// amd64:"BTRL"
+	n += b &^ (1 << (a & 31))
+
+	// amd64:"BTRL\t[$]31"
+	n += a &^ (1 << 31)
+
+	// amd64:"BTRL\t[$]28"
+	n += a &^ (1 << 28)
+
+	// amd64:"ANDL\t[$]-2"
+	n += a &^ (1 << 0)
+
+	return n
+}
+
+func bitcompl32(a, b uint32) (n uint32) {
+	// amd64:"BTCL"
+	n += b ^ (1 << (a & 31))
+
+	// amd64:"BTCL\t[$]31"
+	n += a ^ (1 << 31)
+
+	// amd64:"BTCL\t[$]28"
+	n += a ^ (1 << 28)
+
+	// amd64:"XORL\t[$]1"
+	n += a ^ (1 << 0)
+
+	return n
+}
+
+// check direct operation on memory with constant and shifted constant sources
+func bitOpOnMem(a []uint32, b, c, d uint32) {
+	// amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)`
+	a[0] &= 200
+	// amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)`
+	a[1] |= 220
+	// amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
+	a[2] ^= 240
+}
+
+func bitcheckMostNegative(b uint8) bool {
+	// amd64:"TESTB"
+	return b&0x80 == 0x80
+}
+
+// Check AND masking on arm64 (Issue #19857)
+
+func and_mask_1(a uint64) uint64 {
+	// arm64:`AND\t`
+	return a & ((1 << 63) - 1)
+}
+
+func and_mask_2(a uint64) uint64 {
+	// arm64:`AND\t`
+	return a & (1 << 63)
+}
+
+func and_mask_3(a, b uint32) (uint32, uint32) {
+	// arm/7:`BIC`,-`AND`
+	a &= 0xffffaaaa
+	// arm/7:`BFC`,-`AND`,-`BIC`
+	b &= 0xffc003ff
+	return a, b
+}
+
+// Check generation of arm64 BIC/EON/ORN instructions
+
+func op_bic(x, y uint32) uint32 {
+	// arm64:`BIC\t`,-`AND`
+	return x &^ y
+}
+
+func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 {
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	a[0] = x ^ (y ^ 0xffffffff)
+
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	a[1] = ^(y ^ z)
+
+	// arm64:`EON\t`,-`XOR`
+	a[2] = x ^ ^z
+
+	// arm64:`EON\t`,-`EOR`,-`MVN`
+	return n ^ (m ^ 0xffffffffffffffff)
+}
+
+func op_orn(x, y uint32) uint32 {
+	// arm64:`ORN\t`,-`ORR`
+	return x | ^y
+}
+
+// check bitsets
+func bitSetPowerOf2Test(x int) bool {
+	// amd64:"BTL\t[$]3"
+	return x&8 == 8
+}
+
+func bitSetTest(x int) bool {
+	// amd64:"ANDQ\t[$]9, AX"
+	// amd64:"CMPQ\tAX, [$]9"
+	return x&9 == 9
+}
+
+// mask contiguous one bits
+func cont1Mask64U(x uint64) uint64 {
+	// s390x:"RISBGZ\t[$]16, [$]47, [$]0,"
+	return x & 0x0000ffffffff0000
+}
+
+// mask contiguous zero bits
+func cont0Mask64U(x uint64) uint64 {
+	// s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
+	return x & 0xffff00000000ffff
+}
+
+func issue44228a(a []int64, i int) bool {
+	// amd64: "BTQ", -"SHL"
+	return a[i>>6]&(1<<(i&63)) != 0
+}
+func issue44228b(a []int32, i int) bool {
+	// amd64: "BTL", -"SHL"
+	return a[i>>5]&(1<<(i&31)) != 0
+}
diff --git a/test/codegen/bool.go b/test/codegen/bool.go
new file mode 100644
index 0000000..929b1b4
--- /dev/null
+++ b/test/codegen/bool.go
@@ -0,0 +1,33 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to boolean simplifications/optimizations.
+
+func convertNeq0B(x uint8, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETNE"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0W(x uint16, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETNE"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0L(x uint32, c bool) bool {
+	// amd64:"ANDL\t[$]1",-"SETB"
+	b := x&1 != 0
+	return c && b
+}
+
+func convertNeq0Q(x uint64, c bool) bool {
+	// amd64:"ANDQ\t[$]1",-"SETB"
+	b := x&1 != 0
+	return c && b
+}
diff --git a/test/codegen/clobberdead.go b/test/codegen/clobberdead.go
new file mode 100644
index 0000000..f8d964c
--- /dev/null
+++ b/test/codegen/clobberdead.go
@@ -0,0 +1,32 @@
+// asmcheck -gcflags=-clobberdead
+
+// +build amd64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type T [2]*int // contain pointer, not SSA-able (so locals are not registerized)
+
+var p1, p2, p3 T
+
+func F() {
+	// 3735936685 is 0xdeaddead
+	// clobber x, y at entry. not clobber z (stack object).
+	// amd64:`MOVL\t\$3735936685, ""\.x`, `MOVL\t\$3735936685, ""\.y`, -`MOVL\t\$3735936685, ""\.z`
+	x, y, z := p1, p2, p3
+	addrTaken(&z)
+	// x is dead at the call (the value of x is loaded before the CALL), y is not
+	// amd64:`MOVL\t\$3735936685, ""\.x`, -`MOVL\t\$3735936685, ""\.y`
+	use(x)
+	// amd64:`MOVL\t\$3735936685, ""\.x`, `MOVL\t\$3735936685, ""\.y`
+	use(y)
+}
+
+//go:noinline
+func use(T) {}
+
+//go:noinline
+func addrTaken(*T) {}
diff --git a/test/codegen/clobberdeadreg.go b/test/codegen/clobberdeadreg.go
new file mode 100644
index 0000000..2a93c41
--- /dev/null
+++ b/test/codegen/clobberdeadreg.go
@@ -0,0 +1,33 @@
+// asmcheck -gcflags=-clobberdeadreg
+
+// +build amd64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type S struct {
+	a, b, c, d, e, f int
+}
+
+func F(a, b, c int, d S) {
+	// -2401018187971961171 is 0xdeaddeaddeaddead
+	// amd64:`MOVQ\t\$-2401018187971961171, AX`, `MOVQ\t\$-2401018187971961171, BX`, `MOVQ\t\$-2401018187971961171, CX`
+	// amd64:`MOVQ\t\$-2401018187971961171, DX`, `MOVQ\t\$-2401018187971961171, SI`, `MOVQ\t\$-2401018187971961171, DI`
+	// amd64:`MOVQ\t\$-2401018187971961171, R8`, `MOVQ\t\$-2401018187971961171, R9`, `MOVQ\t\$-2401018187971961171, R10`
+	// amd64:`MOVQ\t\$-2401018187971961171, R11`, `MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`
+	// amd64:-`MOVQ\t\$-2401018187971961171, BP` // frame pointer is not clobbered
+	StackArgsCall([10]int{a, b, c})
+	// amd64:`MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`, `MOVQ\t\$-2401018187971961171, DX`
+	// amd64:-`MOVQ\t\$-2401018187971961171, AX`, -`MOVQ\t\$-2401018187971961171, R11` // register args are not clobbered
+	RegArgsCall(a, b, c, d)
+}
+
+//go:noinline
+func StackArgsCall([10]int) {}
+
+//go:noinline
+//go:registerparams
+func RegArgsCall(int, int, int, S) {}
diff --git a/test/codegen/compare_and_branch.go b/test/codegen/compare_and_branch.go
new file mode 100644
index 0000000..f751506
--- /dev/null
+++ b/test/codegen/compare_and_branch.go
@@ -0,0 +1,206 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:noinline
+func dummy() {}
+
+// Signed 64-bit compare-and-branch.
+func si64(x, y chan int64) {
+	// s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x < <-y {
+		dummy()
+	}
+
+	// s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
+	for <-x == <-y {
+		dummy()
+	}
+}
+
+// Signed 64-bit compare-and-branch with 8-bit immediate.
+func si64x8() {
+	// s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
+	for i := int64(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
+	for i := int64(0); i > -129; i-- {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
+	for i := int64(0); i >= 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
+	for i := int64(0); i <= -129; i-- {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit compare-and-branch.
+func ui64(x, y chan uint64) {
+	// s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x > <-y {
+		dummy()
+	}
+
+	// s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
+	for <-x != <-y {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit comparison with 8-bit immediate.
+func ui64x8() {
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
+	for i := uint64(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
+	for i := uint64(0); i < 256; i++ {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
+	for i := uint64(0); i >= 256; i-- {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
+	for i := uint64(1024); i > 0; i-- {
+		dummy()
+	}
+}
+
+// Signed 32-bit compare-and-branch.
+func si32(x, y chan int32) {
+	// s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x < <-y {
+		dummy()
+	}
+
+	// s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
+	for <-x == <-y {
+		dummy()
+	}
+}
+
+// Signed 32-bit compare-and-branch with 8-bit immediate.
+func si32x8() {
+	// s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
+	for i := int32(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
+	for i := int32(0); i > -129; i-- {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
+	for i := int32(0); i >= 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
+	for i := int32(0); i <= -129; i-- {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit compare-and-branch.
+func ui32(x, y chan uint32) {
+	// s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+	for <-x > <-y {
+		dummy()
+	}
+
+	// s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
+	for <-x != <-y {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit comparison with 8-bit immediate.
+func ui32x8() {
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
+	for i := uint32(0); i < 128; i++ {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
+	for i := uint32(0); i < 256; i++ {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
+	for i := uint32(0); i >= 256; i-- {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
+	for i := uint32(1024); i > 0; i-- {
+		dummy()
+	}
+}
+
+// Signed 64-bit comparison with unsigned 8-bit immediate.
+func si64xu8(x chan int64) {
+	// s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, "
+	for <-x == 128 {
+		dummy()
+	}
+
+	// s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, "
+	for <-x != 255 {
+		dummy()
+	}
+}
+
+// Signed 32-bit comparison with unsigned 8-bit immediate.
+func si32xu8(x chan int32) {
+	// s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, "
+	for <-x == 255 {
+		dummy()
+	}
+
+	// s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, "
+	for <-x != 128 {
+		dummy()
+	}
+}
+
+// Unsigned 64-bit comparison with signed 8-bit immediate.
+func ui64xu8(x chan uint64) {
+	// s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, "
+	for <-x == ^uint64(0) {
+		dummy()
+	}
+
+	// s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, "
+	for <-x != ^uint64(127) {
+		dummy()
+	}
+}
+
+// Unsigned 32-bit comparison with signed 8-bit immediate.
+func ui32xu8(x chan uint32) {
+	// s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, "
+	for <-x == ^uint32(127) {
+		dummy()
+	}
+
+	// s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, "
+	for <-x != ^uint32(0) {
+		dummy()
+	}
+}
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
new file mode 100644
index 0000000..17dcd94
--- /dev/null
+++ b/test/codegen/comparisons.go
@@ -0,0 +1,540 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "unsafe"
+
+// This file contains code generation tests related to the comparison
+// operators.
+
+// -------------- //
+//    Equality    //
+// -------------- //
+
+// Check that compare to constant string use 2/4/8 byte compares
+
+func CompareString1(s string) bool {
+	// amd64:`CMPW\t\(.*\), [$]`
+	// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
+	// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	return s == "xx"
+}
+
+func CompareString2(s string) bool {
+	// amd64:`CMPL\t\(.*\), [$]`
+	// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+	return s == "xxxx"
+}
+
+func CompareString3(s string) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	// arm64:-`CMPW\t`
+	// ppc64:-`CMPW\t`
+	// ppc64le:-`CMPW\t`
+	// s390x:-`CMPW\t`
+	return s == "xxxxxxxx"
+}
+
+// Check that arrays compare use 2/4/8 byte compares
+
+func CompareArray1(a, b [2]byte) bool {
+	// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	// arm64:-`MOVBU\t`
+	// ppc64le:-`MOVBZ\t`
+	// s390x:-`MOVBZ\t`
+	return a == b
+}
+
+func CompareArray2(a, b [3]uint16) bool {
+	// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray3(a, b [3]int16) bool {
+	// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPW\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray4(a, b [12]int8) bool {
+	// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	// amd64:`CMPL\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+func CompareArray5(a, b [15]byte) bool {
+	// amd64:`CMPQ\t""[.+_a-z0-9]+\(SP\), [A-Z]`
+	return a == b
+}
+
+// This was a TODO in mapaccess1_faststr
+func CompareArray6(a, b unsafe.Pointer) bool {
+	// amd64:`CMPL\t\(.*\), [A-Z]`
+	// arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	// s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
+	return *((*[4]byte)(a)) != *((*[4]byte)(b))
+}
+
+// -------------- //
+//    Ordering    //
+// -------------- //
+
+// Test that LEAQ/ADDQconst are folded into SETx ops
+
+var r bool
+
+func CmpFold(x uint32) {
+	// amd64:`SETHI\t.*\(SB\)`
+	r = x > 4
+}
+
+// Test that direct comparisons with memory are generated when
+// possible
+
+func CmpMem1(p int, q *int) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	return p < *q
+}
+
+func CmpMem2(p *int, q int) bool {
+	// amd64:`CMPQ\t\(.*\), [A-Z]`
+	return *p < q
+}
+
+func CmpMem3(p *int) bool {
+	// amd64:`CMPQ\t\(.*\), [$]7`
+	return *p < 7
+}
+
+func CmpMem4(p *int) bool {
+	// amd64:`CMPQ\t\(.*\), [$]7`
+	return 7 < *p
+}
+
+func CmpMem5(p **int) {
+	// amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
+	*p = nil
+}
+
+func CmpMem6(a []int) int {
+	// 386:`CMPL\s8\([A-Z]+\),`
+	// amd64:`CMPQ\s16\([A-Z]+\),`
+	if a[1] > a[2] {
+		return 1
+	} else {
+		return 2
+	}
+}
+
+// Check tbz/tbnz are generated when comparing against zero on arm64
+
+func CmpZero1(a int32, ptr *int) {
+	if a < 0 { // arm64:"TBZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero2(a int64, ptr *int) {
+	if a < 0 { // arm64:"TBZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero3(a int32, ptr *int) {
+	if a >= 0 { // arm64:"TBNZ"
+		*ptr = 0
+	}
+}
+
+func CmpZero4(a int64, ptr *int) {
+	if a >= 0 { // arm64:"TBNZ"
+		*ptr = 0
+	}
+}
+
+func CmpToZero(a, b, d int32, e, f int64) int32 {
+	// arm:`TST`,-`AND`
+	// arm64:`TSTW`,-`AND`
+	// 386:`TESTL`,-`ANDL`
+	// amd64:`TESTL`,-`ANDL`
+	c0 := a&b < 0
+	// arm:`CMN`,-`ADD`
+	// arm64:`CMNW`,-`ADD`
+	c1 := a+b < 0
+	// arm:`TEQ`,-`XOR`
+	c2 := a^b < 0
+	// arm64:`TST`,-`AND`
+	// amd64:`TESTQ`,-`ANDQ`
+	c3 := e&f < 0
+	// arm64:`CMN`,-`ADD`
+	c4 := e+f < 0
+	// not optimized to single CMNW/CMN due to further use of b+d
+	// arm64:`ADD`,-`CMNW`
+	// arm:`ADD`,-`CMN`
+	c5 := b+d == 0
+	// not optimized to single TSTW/TST due to further use of a&d
+	// arm64:`AND`,-`TSTW`
+	// arm:`AND`,-`TST`
+	// 386:`ANDL`
+	c6 := a&d >= 0
+	// arm64:`TST\sR[0-9]+<<3,\sR[0-9]+`
+	c7 := e&(f<<3) < 0
+	// arm64:`CMN\sR[0-9]+<<3,\sR[0-9]+`
+	c8 := e+(f<<3) < 0
+	if c0 {
+		return 1
+	} else if c1 {
+		return 2
+	} else if c2 {
+		return 3
+	} else if c3 {
+		return 4
+	} else if c4 {
+		return 5
+	} else if c5 {
+		return b + d
+	} else if c6 {
+		return a & d
+	} else if c7 {
+		return 7
+	} else if c8 {
+		return 8
+	} else {
+		return 0
+	}
+}
+
+func CmpLogicalToZero(a, b, c uint32, d, e uint64) uint64 {
+
+	// ppc64:"ANDCC",-"CMPW"
+	// ppc64le:"ANDCC",-"CMPW"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if a&63 == 0 {
+		return 1
+	}
+
+	// ppc64:"ANDCC",-"CMP"
+	// ppc64le:"ANDCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d&255 == 0 {
+		return 1
+	}
+
+	// ppc64:"ANDCC",-"CMP"
+	// ppc64le:"ANDCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d&e == 0 {
+		return 1
+	}
+	// ppc64:"ORCC",-"CMP"
+	// ppc64le:"ORCC",-"CMP"
+	// wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if d|e == 0 {
+		return 1
+	}
+
+	// ppc64:"XORCC",-"CMP"
+	// ppc64le:"XORCC",-"CMP"
+	// wasm:"I64Eqz","I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+	if e^d == 0 {
+		return 1
+	}
+	return 0
+}
+
+// The following CmpToZero_ex* check that cmp|cmn with bmi|bpl are generated for
+// 'comparing to zero' expressions
+
+// var + const
+// 'x-const' might be canonicalized to 'x+(-const)', so we check both
+// CMN and CMP for subtraction expressions to make the pattern robust.
+func CmpToZero_ex1(a int64, e int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+3 < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+5 <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+13 >= 0 {
+		return 2
+	}
+
+	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+	if a-7 < 0 {
+		return 3
+	}
+
+	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+	if a-11 >= 0 {
+		return 4
+	}
+
+	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`BEQ`,`(BMI|BPL)`
+	if a-19 > 0 {
+		return 4
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+3 < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+13 >= 0 {
+		return 6
+	}
+
+	// arm64:`CMPW|CMNW`,`(BMI|BPL)`
+	// arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+	if e-7 < 0 {
+		return 7
+	}
+
+	// arm64:`CMPW|CMNW`,`(BMI|BPL)`
+	// arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+	if e-11 >= 0 {
+		return 8
+	}
+
+	return 0
+}
+
+// var + var
+// TODO: optimize 'var - var'
+func CmpToZero_ex2(a, b, c int64, e, f, g int32) int {
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if a+b < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if a+c <= 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+	if b+c >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if e+f < 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+	// arm:`CMN`,-`ADD`,`(BMI|BPL)`
+	if f+g >= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var + var*var
+func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if a+b*c < 0 {
+		return 1
+	}
+
+	// arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+	if b+c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	// arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if e+f*g > 0 {
+		return 5
+	}
+
+	// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+	// arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if f+g*h <= 0 {
+		return 6
+	}
+	return 0
+}
+
+// var - var*var
+func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int {
+	// arm64:`CMP`,-`MSUB`,`MUL`,`BEQ`,`(BMI|BPL)`
+	if a-b*c > 0 {
+		return 1
+	}
+
+	// arm64:`CMP`,-`MSUB`,`MUL`,`(BMI|BPL)`
+	if b-c*d >= 0 {
+		return 2
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if e-f*g < 0 {
+		return 5
+	}
+
+	// arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+	if f-g*h >= 0 {
+		return 6
+	}
+	return 0
+}
+
+func CmpToZero_ex5(e, f int32, u uint32) int {
+	// arm:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+	if e+f<<1 > 0 {
+		return 1
+	}
+
+	// arm:`CMP`,-`SUB`,`(BMI|BPL)`
+	if f-int32(u>>2) >= 0 {
+		return 2
+	}
+	return 0
+}
+func UintLtZero(a uint8, b uint16, c uint32, d uint64) int {
+	// amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCC|JCS)`
+	// arm64: -`(CMPW|CMP|BHS|BLO)`
+	if a < 0 || b < 0 || c < 0 || d < 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintGeqZero(a uint8, b uint16, c uint32, d uint64) int {
+	// amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCS|JCC)`
+	// arm64: -`(CMPW|CMP|BLO|BHS)`
+	if a >= 0 || b >= 0 || c >= 0 || d >= 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLS|BHI)`
+	if a > 0 || b > 0 || c > 0 || d > 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHI|BLS)`
+	if a <= 0 || b <= 0 || c <= 0 || d <= 0 {
+		return 1
+	}
+	return 0
+}
+
+func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHS|BLO)`
+	if a < 1 || b < 1 || c < 1 || d < 1 {
+		return 1
+	}
+	return 0
+}
+
+func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int {
+	// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLO|BHS)`
+	if a >= 1 || b >= 1 || c >= 1 || d >= 1 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToZeroU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < a {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < b {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < c {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if 0 < d {
+		return 1
+	}
+	return 0
+}
+
+func CmpToZeroU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LeU"
+	if a <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if b <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if c <= 0 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if d <= 0 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToOneU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LtU"
+	if a < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if b < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if c < 1 {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LtU"
+	if d < 1 {
+		return 1
+	}
+	return 0
+}
+
+func CmpToOneU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= a {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= b {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= c {
+		return 1
+	}
+	// wasm:"I64Eqz"-"I64LeU"
+	if 1 <= d {
+		return 1
+	}
+	return 0
+}
diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go
new file mode 100644
index 0000000..707f223
--- /dev/null
+++ b/test/codegen/condmove.go
@@ -0,0 +1,402 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func cmovint(c int) int {
+	x := c + 4
+	if x < 0 {
+		x = 182
+	}
+	// amd64:"CMOVQLT"
+	// arm64:"CSEL\tLT"
+	// wasm:"Select"
+	return x
+}
+
+func cmovchan(x, y chan int) chan int {
+	if x != y {
+		x = y
+	}
+	// amd64:"CMOVQNE"
+	// arm64:"CSEL\tNE"
+	// wasm:"Select"
+	return x
+}
+
+func cmovuintptr(x, y uintptr) uintptr {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVQ(HI|CS)"
+	// arm64:"CSNEG\tLS"
+	// wasm:"Select"
+	return x
+}
+
+func cmov32bit(x, y uint32) uint32 {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVL(HI|CS)"
+	// arm64:"CSNEG\t(LS|HS)"
+	// wasm:"Select"
+	return x
+}
+
+func cmov16bit(x, y uint16) uint16 {
+	if x < y {
+		x = -y
+	}
+	// amd64:"CMOVW(HI|CS)"
+	// arm64:"CSNEG\t(LS|HS)"
+	// wasm:"Select"
+	return x
+}
+
+// Floating point comparison. For EQ/NE, we must
+// generate special code to handle NaNs.
+func cmovfloateq(x, y float64) int {
+	a := 128
+	if x == y {
+		a = 256
+	}
+	// amd64:"CMOVQNE","CMOVQPC"
+	// arm64:"CSEL\tEQ"
+	// wasm:"Select"
+	return a
+}
+
+func cmovfloatne(x, y float64) int {
+	a := 128
+	if x != y {
+		a = 256
+	}
+	// amd64:"CMOVQNE","CMOVQPS"
+	// arm64:"CSEL\tNE"
+	// wasm:"Select"
+	return a
+}
+
+//go:noinline
+func frexp(f float64) (frac float64, exp int) {
+	return 1.0, 4
+}
+
+//go:noinline
+func ldexp(frac float64, exp int) float64 {
+	return 1.0
+}
+
+// Generate a CMOV with a floating comparison and integer move.
+func cmovfloatint2(x, y float64) float64 {
+	yfr, yexp := 4.0, 5
+
+	r := x
+	for r >= y {
+		rfr, rexp := frexp(r)
+		if rfr < yfr {
+			rexp = rexp - 1
+		}
+		// amd64:"CMOVQHI"
+		// arm64:"CSEL\tMI"
+		// wasm:"Select"
+		r = r - ldexp(y, rexp-yexp)
+	}
+	return r
+}
+
+func cmovloaded(x [4]int, y int) int {
+	if x[2] != 0 {
+		y = x[2]
+	} else {
+		y = y >> 2
+	}
+	// amd64:"CMOVQNE"
+	// arm64:"CSEL\tNE"
+	// wasm:"Select"
+	return y
+}
+
+func cmovuintptr2(x, y uintptr) uintptr {
+	a := x * 2
+	if a == 0 {
+		a = 256
+	}
+	// amd64:"CMOVQEQ"
+	// arm64:"CSEL\tEQ"
+	// wasm:"Select"
+	return a
+}
+
+// Floating point CMOVs are not supported by amd64/arm64
+func cmovfloatmove(x, y int) float64 {
+	a := 1.0
+	if x <= y {
+		a = 2.0
+	}
+	// amd64:-"CMOV"
+	// arm64:-"CSEL"
+	// wasm:-"Select"
+	return a
+}
+
+// On amd64, the following patterns trigger comparison inversion.
+// Test that we correctly invert the CMOV condition
+var gsink int64
+var gusink uint64
+
+func cmovinvert1(x, y int64) int64 {
+	if x < gsink {
+		y = -y
+	}
+	// amd64:"CMOVQGT"
+	return y
+}
+func cmovinvert2(x, y int64) int64 {
+	if x <= gsink {
+		y = -y
+	}
+	// amd64:"CMOVQGE"
+	return y
+}
+func cmovinvert3(x, y int64) int64 {
+	if x == gsink {
+		y = -y
+	}
+	// amd64:"CMOVQEQ"
+	return y
+}
+func cmovinvert4(x, y int64) int64 {
+	if x != gsink {
+		y = -y
+	}
+	// amd64:"CMOVQNE"
+	return y
+}
+func cmovinvert5(x, y uint64) uint64 {
+	if x > gusink {
+		y = -y
+	}
+	// amd64:"CMOVQCS"
+	return y
+}
+func cmovinvert6(x, y uint64) uint64 {
+	if x >= gusink {
+		y = -y
+	}
+	// amd64:"CMOVQLS"
+	return y
+}
+
+func cmovload(a []int, i int, b bool) int {
+	if b {
+		i++
+	}
+	// See issue 26306
+	// amd64:-"CMOVQNE"
+	return a[i]
+}
+
+func cmovstore(a []int, i int, b bool) {
+	if b {
+		i++
+	}
+	// amd64:"CMOVQNE"
+	a[i] = 7
+}
+
+var r0, r1, r2, r3, r4, r5 int
+
+func cmovinc(cond bool, a, b, c int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = b + 1
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = b + 1
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r1 = x1
+
+	if cond {
+		c++
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r2 = c
+}
+
+func cmovinv(cond bool, a, b int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = ^b
+	}
+	// arm64:"CSINV\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = ^b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINV\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovneg(cond bool, a, b, c int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = a
+	} else {
+		x0 = -b
+	}
+	// arm64:"CSNEG\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = -b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSNEG\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovsetm(cond bool, x int) {
+	var x0, x1 int
+
+	if cond {
+		x0 = -1
+	} else {
+		x0 = 0
+	}
+	// arm64:"CSETM\tNE", -"CSEL"
+	r0 = x0
+
+	if cond {
+		x1 = 0
+	} else {
+		x1 = -1
+	}
+	// arm64:"CSETM\tEQ", -"CSEL"
+	r1 = x1
+}
+
+func cmovFcmp0(s, t float64, a, b int) {
+	var x0, x1, x2, x3, x4, x5 int
+
+	if s < t {
+		x0 = a
+	} else {
+		x0 = b + 1
+	}
+	// arm64:"CSINC\tMI", -"CSEL"
+	r0 = x0
+
+	if s <= t {
+		x1 = a
+	} else {
+		x1 = ^b
+	}
+	// arm64:"CSINV\tLS", -"CSEL"
+	r1 = x1
+
+	if s > t {
+		x2 = a
+	} else {
+		x2 = -b
+	}
+	// arm64:"CSNEG\tMI", -"CSEL"
+	r2 = x2
+
+	if s >= t {
+		x3 = -1
+	} else {
+		x3 = 0
+	}
+	// arm64:"CSETM\tLS", -"CSEL"
+	r3 = x3
+
+	if s == t {
+		x4 = a
+	} else {
+		x4 = b + 1
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r4 = x4
+
+	if s != t {
+		x5 = a
+	} else {
+		x5 = b + 1
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r5 = x5
+}
+
+func cmovFcmp1(s, t float64, a, b int) {
+	var x0, x1, x2, x3, x4, x5 int
+
+	if s < t {
+		x0 = b + 1
+	} else {
+		x0 = a
+	}
+	// arm64:"CSINC\tPL", -"CSEL"
+	r0 = x0
+
+	if s <= t {
+		x1 = ^b
+	} else {
+		x1 = a
+	}
+	// arm64:"CSINV\tHI", -"CSEL"
+	r1 = x1
+
+	if s > t {
+		x2 = -b
+	} else {
+		x2 = a
+	}
+	// arm64:"CSNEG\tPL", -"CSEL"
+	r2 = x2
+
+	if s >= t {
+		x3 = 0
+	} else {
+		x3 = -1
+	}
+	// arm64:"CSETM\tHI", -"CSEL"
+	r3 = x3
+
+	if s == t {
+		x4 = b + 1
+	} else {
+		x4 = a
+	}
+	// arm64:"CSINC\tNE", -"CSEL"
+	r4 = x4
+
+	if s != t {
+		x5 = b + 1
+	} else {
+		x5 = a
+	}
+	// arm64:"CSINC\tEQ", -"CSEL"
+	r5 = x5
+}
diff --git a/test/codegen/copy.go b/test/codegen/copy.go
new file mode 100644
index 0000000..ea8a01f
--- /dev/null
+++ b/test/codegen/copy.go
@@ -0,0 +1,160 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// Check small copies are replaced with moves.
+
+func movesmall4() {
+	x := [...]byte{1, 2, 3, 4}
+	// 386:-".*memmove"
+	// amd64:-".*memmove"
+	// arm:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	copy(x[1:], x[:])
+}
+
+func movesmall7() {
+	x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+	// 386:-".*memmove"
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	copy(x[1:], x[:])
+}
+
+func movesmall16() {
+	x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+	// amd64:-".*memmove"
+	// ppc64:".*memmove"
+	// ppc64le:".*memmove"
+	copy(x[1:], x[:])
+}
+
+var x [256]byte
+
+// Check that large disjoint copies are replaced with moves.
+
+func moveDisjointStack32() {
+	var s [32]byte
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	// ppc64le/power8:"LXVD2X",-"ADD",-"BC"
+	// ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+	copy(s[:], x[:32])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack64() {
+	var s [96]byte
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	// ppc64le/power8:"LXVD2X","ADD","BC"
+	// ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+	copy(s[:], x[:96])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack() {
+	var s [256]byte
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	// ppc64le/power8:"LXVD2X"
+	// ppc64le/power9:"LXV",-"LXVD2X"
+	copy(s[:], x[:])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointArg(b *[256]byte) {
+	var s [256]byte
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	// ppc64le/power8:"LXVD2X"
+	// ppc64le/power9:"LXV",-"LXVD2X"
+	copy(s[:], b[:])
+	runtime.KeepAlive(&s)
+}
+
+func moveDisjointNoOverlap(a *[256]byte) {
+	// s390x:-".*memmove"
+	// amd64:-".*memmove"
+	// ppc64:-".*memmove"
+	// ppc64le:-".*memmove"
+	// ppc64le/power8:"LXVD2X"
+	// ppc64le/power9:"LXV",-"LXVD2X"
+	copy(a[:], a[128:])
+}
+
+// Check arch-specific memmove lowering. See issue 41662 fot details
+
+func moveArchLowering1(b []byte, x *[1]byte) {
+	_ = b[1]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering2(b []byte, x *[2]byte) {
+	_ = b[2]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering4(b []byte, x *[4]byte) {
+	_ = b[4]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering8(b []byte, x *[8]byte) {
+	_ = b[8]
+	// amd64:-".*memmove"
+	// arm64:-".*memmove"
+	copy(b, x[:])
+}
+
+func moveArchLowering16(b []byte, x *[16]byte) {
+	_ = b[16]
+	// amd64:-".*memmove"
+	copy(b, x[:])
+}
+
+// Check that no branches are generated when the pointers are [not] equal.
+
+func ptrEqual() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64:-"BEQ",-"BNE"
+	// ppc64le:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[:], x[:])
+}
+
+func ptrOneOffset() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64:-"BEQ",-"BNE"
+	// ppc64le:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[1:], x[:])
+}
+
+func ptrBothOffset() {
+	// amd64:-"JEQ",-"JNE"
+	// ppc64:-"BEQ",-"BNE"
+	// ppc64le:-"BEQ",-"BNE"
+	// s390x:-"BEQ",-"BNE"
+	copy(x[1:], x[2:])
+}
diff --git a/test/codegen/floats.go b/test/codegen/floats.go
new file mode 100644
index 0000000..397cbb8
--- /dev/null
+++ b/test/codegen/floats.go
@@ -0,0 +1,156 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on float types.
+// For codegen tests on integer types, see arithmetic.go.
+
+// --------------------- //
+//    Strength-reduce    //
+// --------------------- //
+
+func Mul2(f float64) float64 {
+	// 386/sse2:"ADDSD",-"MULSD"
+	// amd64:"ADDSD",-"MULSD"
+	// arm/7:"ADDD",-"MULD"
+	// arm64:"FADDD",-"FMULD"
+	// ppc64:"FADD",-"FMUL"
+	// ppc64le:"FADD",-"FMUL"
+	return f * 2.0
+}
+
+func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
+	// 386/sse2:"MULSD",-"DIVSD"
+	// amd64:"MULSD",-"DIVSD"
+	// arm/7:"MULD",-"DIVD"
+	// arm64:"FMULD",-"FDIVD"
+	// ppc64:"FMUL",-"FDIV"
+	// ppc64le:"FMUL",-"FDIV"
+	x := f1 / 16.0
+
+	// 386/sse2:"MULSD",-"DIVSD"
+	// amd64:"MULSD",-"DIVSD"
+	// arm/7:"MULD",-"DIVD"
+	// arm64:"FMULD",-"FDIVD"
+	// ppc64:"FMUL",-"FDIVD"
+	// ppc64le:"FMUL",-"FDIVD"
+	y := f2 / 0.125
+
+	// 386/sse2:"ADDSD",-"DIVSD",-"MULSD"
+	// amd64:"ADDSD",-"DIVSD",-"MULSD"
+	// arm/7:"ADDD",-"MULD",-"DIVD"
+	// arm64:"FADDD",-"FMULD",-"FDIVD"
+	// ppc64:"FADD",-"FMUL",-"FDIV"
+	// ppc64le:"FADD",-"FMUL",-"FDIV"
+	z := f3 / 0.5
+
+	return x, y, z
+}
+
+func indexLoad(b0 []float32, b1 float32, idx int) float32 {
+	// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
+	return b0[idx] * b1
+}
+
+func indexStore(b0 []float64, b1 float64, idx int) {
+	// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
+	b0[idx] = b1
+}
+
+// ----------- //
+//    Fused    //
+// ----------- //
+
+func FusedAdd32(x, y, z float32) float32 {
+	// s390x:"FMADDS\t"
+	// ppc64:"FMADDS\t"
+	// ppc64le:"FMADDS\t"
+	// arm64:"FMADDS"
+	return x*y + z
+}
+
+func FusedSub32_a(x, y, z float32) float32 {
+	// s390x:"FMSUBS\t"
+	// ppc64:"FMSUBS\t"
+	// ppc64le:"FMSUBS\t"
+	return x*y - z
+}
+
+func FusedSub32_b(x, y, z float32) float32 {
+	// arm64:"FMSUBS"
+	return z - x*y
+}
+
+func FusedAdd64(x, y, z float64) float64 {
+	// s390x:"FMADD\t"
+	// ppc64:"FMADD\t"
+	// ppc64le:"FMADD\t"
+	// arm64:"FMADDD"
+	return x*y + z
+}
+
+func FusedSub64_a(x, y, z float64) float64 {
+	// s390x:"FMSUB\t"
+	// ppc64:"FMSUB\t"
+	// ppc64le:"FMSUB\t"
+	return x*y - z
+}
+
+func FusedSub64_b(x, y, z float64) float64 {
+	// arm64:"FMSUBD"
+	return z - x*y
+}
+
+func Cmp(f float64) bool {
+	// arm64:"FCMPD","(BGT|BLE|BMI|BPL)",-"CSET\tGT",-"CBZ"
+	return f > 4 || f < -4
+}
+
+func CmpZero64(f float64) bool {
+	// s390x:"LTDBR",-"FCMPU"
+	return f <= 0
+}
+
+func CmpZero32(f float32) bool {
+	// s390x:"LTEBR",-"CEBR"
+	return f <= 0
+}
+
+func CmpWithSub(a float64, b float64) bool {
+	f := a - b
+	// s390x:-"LTDBR"
+	return f <= 0
+}
+
+func CmpWithAdd(a float64, b float64) bool {
+	f := a + b
+	// s390x:-"LTDBR"
+	return f <= 0
+}
+
+// ---------------- //
+//    Non-floats    //
+// ---------------- //
+
+// We should make sure that the compiler doesn't generate floating point
+// instructions for non-float operations on Plan 9, because floating point
+// operations are not allowed in the note handler.
+
+func ArrayZero() [16]byte {
+	// amd64:"MOVUPS"
+	// plan9/amd64/:-"MOVUPS"
+	var a [16]byte
+	return a
+}
+
+func ArrayCopy(a [16]byte) (b [16]byte) {
+	// amd64:"MOVUPS"
+	// plan9/amd64/:-"MOVUPS"
+	b = a
+	return
+}
diff --git a/test/codegen/fuse.go b/test/codegen/fuse.go
new file mode 100644
index 0000000..79dd337
--- /dev/null
+++ b/test/codegen/fuse.go
@@ -0,0 +1,197 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Notes:
+// - these examples use channels to provide a source of
+//   unknown values that cannot be optimized away
+// - these examples use for loops to force branches
+//   backward (predicted taken)
+
+// ---------------------------------- //
+// signed integer range (conjunction) //
+// ---------------------------------- //
+
+func si1c(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]256"
+	// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si2c(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]256"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si3c(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]256"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+	for x := <-c; x >= 0 && x < 256; x = <-c {
+	}
+}
+
+func si4c(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]10"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]10"
+	for x := <-c; x >= 0 && x < 10; x = <-c {
+	}
+}
+
+func si5c(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x < 256 && x > 4; x = <-c {
+	}
+}
+
+func si6c(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x > 0 && x <= 256; x = <-c {
+	}
+}
+
+func si7c(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]60","ADDW\t[$]10,"
+	for x := <-c; x >= -10 && x <= 50; x = <-c {
+	}
+}
+
+func si8c(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]126","ADDW\t[$]126,"
+	for x := <-c; x >= -126 && x < 0; x = <-c {
+	}
+}
+
+// ---------------------------------- //
+// signed integer range (disjunction) //
+// ---------------------------------- //
+
+func si1d(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]256"
+	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si2d(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]256"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si3d(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]256"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+	for x := <-c; x < 0 || x >= 256; x = <-c {
+	}
+}
+
+func si4d(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]10"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]10"
+	for x := <-c; x < 0 || x >= 10; x = <-c {
+	}
+}
+
+func si5d(c <-chan int64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x >= 256 || x <= 4; x = <-c {
+	}
+}
+
+func si6d(c <-chan int32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x <= 0 || x > 256; x = <-c {
+	}
+}
+
+func si7d(c <-chan int16) {
+	// amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]60","ADDW\t[$]10,"
+	for x := <-c; x < -10 || x > 50; x = <-c {
+	}
+}
+
+func si8d(c <-chan int8) {
+	// amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]126","ADDW\t[$]126,"
+	for x := <-c; x < -126 || x >= 0; x = <-c {
+	}
+}
+
+// ------------------------------------ //
+// unsigned integer range (conjunction) //
+// ------------------------------------ //
+
+func ui1c(c <-chan uint64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x < 256 && x > 4; x = <-c {
+	}
+}
+
+func ui2c(c <-chan uint32) {
+	// amd64:"CMPL\t.+, [$]255","DECL\t"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+	for x := <-c; x > 0 && x <= 256; x = <-c {
+	}
+}
+
+func ui3c(c <-chan uint16) {
+	// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]40","ADDW\t[$]-10,"
+	for x := <-c; x >= 10 && x <= 50; x = <-c {
+	}
+}
+
+func ui4c(c <-chan uint8) {
+	// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]2","ADDW\t[$]-126,"
+	for x := <-c; x >= 126 && x < 128; x = <-c {
+	}
+}
+
+// ------------------------------------ //
+// unsigned integer range (disjunction) //
+// ------------------------------------ //
+
+func ui1d(c <-chan uint64) {
+	// amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+	// s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+	for x := <-c; x >= 256 || x <= 4; x = <-c {
+	}
+}
+
+func ui2d(c <-chan uint32) {
+	// amd64:"CMPL\t.+, [$]254","ADDL\t[$]-2,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]254","ADDW\t[$]-2,"
+	for x := <-c; x <= 1 || x > 256; x = <-c {
+	}
+}
+
+func ui3d(c <-chan uint16) {
+	// amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]40","ADDW\t[$]-10,"
+	for x := <-c; x < 10 || x > 50; x = <-c {
+	}
+}
+
+func ui4d(c <-chan uint8) {
+	// amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+	// s390x:"CLIJ\t[$]10, R[0-9]+, [$]2","ADDW\t[$]-126,"
+	for x := <-c; x < 126 || x >= 128; x = <-c {
+	}
+}
diff --git a/test/codegen/issue22703.go b/test/codegen/issue22703.go
new file mode 100644
index 0000000..0201de6
--- /dev/null
+++ b/test/codegen/issue22703.go
@@ -0,0 +1,535 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type I interface {
+	foo000()
+	foo001()
+	foo002()
+	foo003()
+	foo004()
+	foo005()
+	foo006()
+	foo007()
+	foo008()
+	foo009()
+	foo010()
+	foo011()
+	foo012()
+	foo013()
+	foo014()
+	foo015()
+	foo016()
+	foo017()
+	foo018()
+	foo019()
+	foo020()
+	foo021()
+	foo022()
+	foo023()
+	foo024()
+	foo025()
+	foo026()
+	foo027()
+	foo028()
+	foo029()
+	foo030()
+	foo031()
+	foo032()
+	foo033()
+	foo034()
+	foo035()
+	foo036()
+	foo037()
+	foo038()
+	foo039()
+	foo040()
+	foo041()
+	foo042()
+	foo043()
+	foo044()
+	foo045()
+	foo046()
+	foo047()
+	foo048()
+	foo049()
+	foo050()
+	foo051()
+	foo052()
+	foo053()
+	foo054()
+	foo055()
+	foo056()
+	foo057()
+	foo058()
+	foo059()
+	foo060()
+	foo061()
+	foo062()
+	foo063()
+	foo064()
+	foo065()
+	foo066()
+	foo067()
+	foo068()
+	foo069()
+	foo070()
+	foo071()
+	foo072()
+	foo073()
+	foo074()
+	foo075()
+	foo076()
+	foo077()
+	foo078()
+	foo079()
+	foo080()
+	foo081()
+	foo082()
+	foo083()
+	foo084()
+	foo085()
+	foo086()
+	foo087()
+	foo088()
+	foo089()
+	foo090()
+	foo091()
+	foo092()
+	foo093()
+	foo094()
+	foo095()
+	foo096()
+	foo097()
+	foo098()
+	foo099()
+	foo100()
+	foo101()
+	foo102()
+	foo103()
+	foo104()
+	foo105()
+	foo106()
+	foo107()
+	foo108()
+	foo109()
+	foo110()
+	foo111()
+	foo112()
+	foo113()
+	foo114()
+	foo115()
+	foo116()
+	foo117()
+	foo118()
+	foo119()
+	foo120()
+	foo121()
+	foo122()
+	foo123()
+	foo124()
+	foo125()
+	foo126()
+	foo127()
+	foo128()
+	foo129()
+	foo130()
+	foo131()
+	foo132()
+	foo133()
+	foo134()
+	foo135()
+	foo136()
+	foo137()
+	foo138()
+	foo139()
+	foo140()
+	foo141()
+	foo142()
+	foo143()
+	foo144()
+	foo145()
+	foo146()
+	foo147()
+	foo148()
+	foo149()
+	foo150()
+	foo151()
+	foo152()
+	foo153()
+	foo154()
+	foo155()
+	foo156()
+	foo157()
+	foo158()
+	foo159()
+	foo160()
+	foo161()
+	foo162()
+	foo163()
+	foo164()
+	foo165()
+	foo166()
+	foo167()
+	foo168()
+	foo169()
+	foo170()
+	foo171()
+	foo172()
+	foo173()
+	foo174()
+	foo175()
+	foo176()
+	foo177()
+	foo178()
+	foo179()
+	foo180()
+	foo181()
+	foo182()
+	foo183()
+	foo184()
+	foo185()
+	foo186()
+	foo187()
+	foo188()
+	foo189()
+	foo190()
+	foo191()
+	foo192()
+	foo193()
+	foo194()
+	foo195()
+	foo196()
+	foo197()
+	foo198()
+	foo199()
+	foo200()
+	foo201()
+	foo202()
+	foo203()
+	foo204()
+	foo205()
+	foo206()
+	foo207()
+	foo208()
+	foo209()
+	foo210()
+	foo211()
+	foo212()
+	foo213()
+	foo214()
+	foo215()
+	foo216()
+	foo217()
+	foo218()
+	foo219()
+	foo220()
+	foo221()
+	foo222()
+	foo223()
+	foo224()
+	foo225()
+	foo226()
+	foo227()
+	foo228()
+	foo229()
+	foo230()
+	foo231()
+	foo232()
+	foo233()
+	foo234()
+	foo235()
+	foo236()
+	foo237()
+	foo238()
+	foo239()
+	foo240()
+	foo241()
+	foo242()
+	foo243()
+	foo244()
+	foo245()
+	foo246()
+	foo247()
+	foo248()
+	foo249()
+	foo250()
+	foo251()
+	foo252()
+	foo253()
+	foo254()
+	foo255()
+	foo256()
+	foo257()
+	foo258()
+	foo259()
+	foo260()
+	foo261()
+	foo262()
+	foo263()
+	foo264()
+	foo265()
+	foo266()
+	foo267()
+	foo268()
+	foo269()
+	foo270()
+	foo271()
+	foo272()
+	foo273()
+	foo274()
+	foo275()
+	foo276()
+	foo277()
+	foo278()
+	foo279()
+	foo280()
+	foo281()
+	foo282()
+	foo283()
+	foo284()
+	foo285()
+	foo286()
+	foo287()
+	foo288()
+	foo289()
+	foo290()
+	foo291()
+	foo292()
+	foo293()
+	foo294()
+	foo295()
+	foo296()
+	foo297()
+	foo298()
+	foo299()
+	foo300()
+	foo301()
+	foo302()
+	foo303()
+	foo304()
+	foo305()
+	foo306()
+	foo307()
+	foo308()
+	foo309()
+	foo310()
+	foo311()
+	foo312()
+	foo313()
+	foo314()
+	foo315()
+	foo316()
+	foo317()
+	foo318()
+	foo319()
+	foo320()
+	foo321()
+	foo322()
+	foo323()
+	foo324()
+	foo325()
+	foo326()
+	foo327()
+	foo328()
+	foo329()
+	foo330()
+	foo331()
+	foo332()
+	foo333()
+	foo334()
+	foo335()
+	foo336()
+	foo337()
+	foo338()
+	foo339()
+	foo340()
+	foo341()
+	foo342()
+	foo343()
+	foo344()
+	foo345()
+	foo346()
+	foo347()
+	foo348()
+	foo349()
+	foo350()
+	foo351()
+	foo352()
+	foo353()
+	foo354()
+	foo355()
+	foo356()
+	foo357()
+	foo358()
+	foo359()
+	foo360()
+	foo361()
+	foo362()
+	foo363()
+	foo364()
+	foo365()
+	foo366()
+	foo367()
+	foo368()
+	foo369()
+	foo370()
+	foo371()
+	foo372()
+	foo373()
+	foo374()
+	foo375()
+	foo376()
+	foo377()
+	foo378()
+	foo379()
+	foo380()
+	foo381()
+	foo382()
+	foo383()
+	foo384()
+	foo385()
+	foo386()
+	foo387()
+	foo388()
+	foo389()
+	foo390()
+	foo391()
+	foo392()
+	foo393()
+	foo394()
+	foo395()
+	foo396()
+	foo397()
+	foo398()
+	foo399()
+	foo400()
+	foo401()
+	foo402()
+	foo403()
+	foo404()
+	foo405()
+	foo406()
+	foo407()
+	foo408()
+	foo409()
+	foo410()
+	foo411()
+	foo412()
+	foo413()
+	foo414()
+	foo415()
+	foo416()
+	foo417()
+	foo418()
+	foo419()
+	foo420()
+	foo421()
+	foo422()
+	foo423()
+	foo424()
+	foo425()
+	foo426()
+	foo427()
+	foo428()
+	foo429()
+	foo430()
+	foo431()
+	foo432()
+	foo433()
+	foo434()
+	foo435()
+	foo436()
+	foo437()
+	foo438()
+	foo439()
+	foo440()
+	foo441()
+	foo442()
+	foo443()
+	foo444()
+	foo445()
+	foo446()
+	foo447()
+	foo448()
+	foo449()
+	foo450()
+	foo451()
+	foo452()
+	foo453()
+	foo454()
+	foo455()
+	foo456()
+	foo457()
+	foo458()
+	foo459()
+	foo460()
+	foo461()
+	foo462()
+	foo463()
+	foo464()
+	foo465()
+	foo466()
+	foo467()
+	foo468()
+	foo469()
+	foo470()
+	foo471()
+	foo472()
+	foo473()
+	foo474()
+	foo475()
+	foo476()
+	foo477()
+	foo478()
+	foo479()
+	foo480()
+	foo481()
+	foo482()
+	foo483()
+	foo484()
+	foo485()
+	foo486()
+	foo487()
+	foo488()
+	foo489()
+	foo490()
+	foo491()
+	foo492()
+	foo493()
+	foo494()
+	foo495()
+	foo496()
+	foo497()
+	foo498()
+	foo499()
+	foo500()
+	foo501()
+	foo502()
+	foo503()
+	foo504()
+	foo505()
+	foo506()
+	foo507()
+	foo508()
+	foo509()
+	foo510()
+	foo511()
+}
+
+// Nil checks before calling interface methods.
+// We need it only when the offset is large.
+
+func callMethodSmallOffset(i I) {
+	// amd64:-"TESTB"
+	i.foo001()
+}
+
+func callMethodLargeOffset(i I) {
+	// amd64:"TESTB"
+	i.foo511()
+}
diff --git a/test/codegen/issue25378.go b/test/codegen/issue25378.go
new file mode 100644
index 0000000..810a022
--- /dev/null
+++ b/test/codegen/issue25378.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var wsp = [256]bool{
+	' ':  true,
+	'\t': true,
+	'\n': true,
+	'\r': true,
+}
+
+func zeroExtArgByte(ch [2]byte) bool {
+	return wsp[ch[0]] // amd64:-"MOVBLZX\t..,.."
+}
+
+func zeroExtArgUint16(ch [2]uint16) bool {
+	return wsp[ch[0]] // amd64:-"MOVWLZX\t..,.."
+}
diff --git a/test/codegen/issue31618.go b/test/codegen/issue31618.go
new file mode 100644
index 0000000..8effe29
--- /dev/null
+++ b/test/codegen/issue31618.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we remove both inline marks in the following code.
+// Both +5 and +6 should map to real instructions, which can
+// be used as inline marks instead of explicit nops.
+func f(x int) int {
+	// amd64:-"XCHGL"
+	x = g(x) + 5
+	// amd64:-"XCHGL"
+	x = g(x) + 6
+	return x
+}
+
+func g(x int) int {
+	return x >> 3
+}
diff --git a/test/codegen/issue33580.go b/test/codegen/issue33580.go
new file mode 100644
index 0000000..1ded944
--- /dev/null
+++ b/test/codegen/issue33580.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure we reuse large constant loads, if we can.
+// See issue 33580.
+
+package codegen
+
+const (
+	A = 7777777777777777
+	B = 8888888888888888
+)
+
+func f(x, y uint64) uint64 {
+	p := x & A
+	q := y & A
+	r := x & B
+	// amd64:-"MOVQ.*8888888888888888"
+	s := y & B
+
+	return p * q * r * s
+}
diff --git a/test/codegen/issue38554.go b/test/codegen/issue38554.go
new file mode 100644
index 0000000..84db847
--- /dev/null
+++ b/test/codegen/issue38554.go
@@ -0,0 +1,15 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that we are zeroing directly instead of
+// copying a large zero value. Issue 38554.
+
+package codegen
+
+func retlarge() [256]byte {
+	// amd64:-"DUFFCOPY"
+	return [256]byte{}
+}
diff --git a/test/codegen/issue42610.go b/test/codegen/issue42610.go
new file mode 100644
index 0000000..c7eeddc
--- /dev/null
+++ b/test/codegen/issue42610.go
@@ -0,0 +1,30 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Don't allow 0 masks in shift lowering rules on ppc64x.
+// See issue 42610.
+
+package codegen
+
+func f32(a []int32, i uint32) {
+        g := func(p int32) int32 {
+                i = uint32(p) * (uint32(p) & (i & 1))
+                return 1
+        }
+        // ppc64le: -"RLWNIM"
+        // ppc64: -"RLWNIM"
+        a[0] = g(8) >> 1
+}
+
+func f(a []int, i uint) {
+	g := func(p int) int {
+		i = uint(p) * (uint(p) & (i & 1))
+		return 1
+	}
+	// ppc64le: -"RLDIC"
+	// ppc64: -"RLDIC"
+	a[0] = g(8) >> 1
+}
diff --git a/test/codegen/logic.go b/test/codegen/logic.go
new file mode 100644
index 0000000..9afdfd7
--- /dev/null
+++ b/test/codegen/logic.go
@@ -0,0 +1,24 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var gx, gy int
+
+// Test to make sure that (CMPQ (ANDQ x y) [0]) does not get rewritten to
+// (TESTQ x y) if the ANDQ has other uses. If that rewrite happens, then one
+// of the args of the ANDQ needs to be saved so it can be used as the arg to TESTQ.
+func andWithUse(x, y int) int {
+	// Load x,y into registers, so those MOVQ will not appear at the z := x&y line.
+	gx, gy = x, y
+	// amd64:-"MOVQ"
+	z := x & y
+	if z == 0 {
+		return 77
+	}
+	// use z by returning it
+	return z
+}
diff --git a/test/codegen/mapaccess.go b/test/codegen/mapaccess.go
new file mode 100644
index 0000000..a914a0c
--- /dev/null
+++ b/test/codegen/mapaccess.go
@@ -0,0 +1,486 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// These tests check that mapaccess calls are not used.
+// Issues #23661 and #24364.
+
+func mapCompoundAssignmentInt8() {
+	m := make(map[int8]int8, 0)
+	var k int8 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 45
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentInt32() {
+	m := make(map[int32]int32, 0)
+	var k int32 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentInt64() {
+	m := make(map[int64]int64, 0)
+	var k int64 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] |= 78
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] ^= 89
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] <<= 9
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] >>= 10
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentComplex128() {
+	m := make(map[complex128]complex128, 0)
+	var k complex128 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += 67890
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] -= 123
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] *= 456
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]++
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k]--
+}
+
+func mapCompoundAssignmentString() {
+	m := make(map[string]string, 0)
+	var k string = "key"
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] += "value"
+}
+
+var sinkAppend bool
+
+// TODO: optimization is not applied because of mapslow flag.
+func mapAppendAssignmentInt8() {
+	m := make(map[int8][]int8, 0)
+	var k int8 = 0
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int8{7, 8, 9, 0}
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt32() {
+	m := make(map[int32][]int32, 0)
+	var k int32 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int32{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k+1] = append(m[k+1], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[-k] = append(m[-k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt64() {
+	m := make(map[int64][]int64, 0)
+	var k int64 = 0
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []int64{7, 8, 9, 0}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k+1] = append(m[k+1], a...)
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[-k] = append(m[-k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+// TODO: optimization is not applied because of mapslow flag.
+func mapAppendAssignmentComplex128() {
+	m := make(map[complex128][]complex128, 0)
+	var k complex128 = 0
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], 1)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], 1, 2, 3)
+
+	a := []complex128{7, 8, 9, 0}
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentString() {
+	m := make(map[string][]string, 0)
+	var k string = "key"
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], "1")
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], "1", "2", "3")
+
+	a := []string{"7", "8", "9", "0"}
+
+	// 386:-".*mapaccess"
+	// amd64:-".*mapaccess"
+	// arm:-".*mapaccess"
+	// arm64:-".*mapaccess"
+	m[k] = append(m[k], a...)
+
+	// Exceptions
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(a, m[k]...)
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	sinkAppend, m[k] = !sinkAppend, append(m[k], "99")
+
+	// 386:".*mapaccess"
+	// amd64:".*mapaccess"
+	// arm:".*mapaccess"
+	// arm64:".*mapaccess"
+	m[k] = append(m[k+"1"], "100")
+}
diff --git a/test/codegen/maps.go b/test/codegen/maps.go
new file mode 100644
index 0000000..dcb4a93
--- /dev/null
+++ b/test/codegen/maps.go
@@ -0,0 +1,124 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// map types.
+
+// ------------------- //
+//     Access Const    //
+// ------------------- //
+
+// Direct use of constants in fast map access calls (Issue #19015).
+
+func AccessInt1(m map[int]int) int {
+	// amd64:"MOV[LQ]\t[$]5"
+	return m[5]
+}
+
+func AccessInt2(m map[int]int) bool {
+	// amd64:"MOV[LQ]\t[$]5"
+	_, ok := m[5]
+	return ok
+}
+
+func AccessString1(m map[string]int) int {
+	// amd64:`.*"abc"`
+	return m["abc"]
+}
+
+func AccessString2(m map[string]int) bool {
+	// amd64:`.*"abc"`
+	_, ok := m["abc"]
+	return ok
+}
+
+// ------------------- //
+//  String Conversion  //
+// ------------------- //
+
+func LookupStringConversionSimple(m map[string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[string(bytes)]
+}
+
+func LookupStringConversionStructLit(m map[struct{ string }]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[struct{ string }{string(bytes)}]
+}
+
+func LookupStringConversionArrayLit(m map[[2]string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[2]string{string(bytes), string(bytes)}]
+}
+
+func LookupStringConversionNestedLit(m map[[1]struct{ s [1]string }]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[1]struct{ s [1]string }{struct{ s [1]string }{s: [1]string{string(bytes)}}}]
+}
+
+func LookupStringConversionKeyedArrayLit(m map[[2]string]int, bytes []byte) int {
+	// amd64:-`.*runtime\.slicebytetostring\(`
+	return m[[2]string{0: string(bytes)}]
+}
+
+// ------------------- //
+//     Map Clear       //
+// ------------------- //
+
+// Optimization of map clear idiom (Issue #20138).
+
+func MapClearReflexive(m map[int]int) {
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearIndirect(m map[int]int) {
+	s := struct{ m map[int]int }{m: m}
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range s.m {
+		delete(s.m, k)
+	}
+}
+
+func MapClearPointer(m map[*byte]int) {
+	// amd64:`.*runtime\.mapclear`
+	// amd64:-`.*runtime\.mapiterinit`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearNotReflexive(m map[float64]int) {
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearInterface(m map[interface{}]int) {
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k := range m {
+		delete(m, k)
+	}
+}
+
+func MapClearSideEffect(m map[int]int) int {
+	k := 0
+	// amd64:`.*runtime\.mapiterinit`
+	// amd64:-`.*runtime\.mapclear`
+	for k = range m {
+		delete(m, k)
+	}
+	return k
+}
diff --git a/test/codegen/math.go b/test/codegen/math.go
new file mode 100644
index 0000000..04cb4e5
--- /dev/null
+++ b/test/codegen/math.go
@@ -0,0 +1,228 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math"
+
+var sink64 [8]float64
+
+func approx(x float64) {
+	// s390x:"FIDBR\t[$]6"
+	// arm64:"FRINTPD"
+	// ppc64:"FRIP"
+	// ppc64le:"FRIP"
+	// wasm:"F64Ceil"
+	sink64[0] = math.Ceil(x)
+
+	// s390x:"FIDBR\t[$]7"
+	// arm64:"FRINTMD"
+	// ppc64:"FRIM"
+	// ppc64le:"FRIM"
+	// wasm:"F64Floor"
+	sink64[1] = math.Floor(x)
+
+	// s390x:"FIDBR\t[$]1"
+	// arm64:"FRINTAD"
+	// ppc64:"FRIN"
+	// ppc64le:"FRIN"
+	sink64[2] = math.Round(x)
+
+	// s390x:"FIDBR\t[$]5"
+	// arm64:"FRINTZD"
+	// ppc64:"FRIZ"
+	// ppc64le:"FRIZ"
+	// wasm:"F64Trunc"
+	sink64[3] = math.Trunc(x)
+
+	// s390x:"FIDBR\t[$]4"
+	// arm64:"FRINTND"
+	// wasm:"F64Nearest"
+	sink64[4] = math.RoundToEven(x)
+}
+
+func sqrt(x float64) float64 {
+	// amd64:"SQRTSD"
+	// 386/sse2:"SQRTSD" 386/softfloat:-"SQRTD"
+	// arm64:"FSQRTD"
+	// arm/7:"SQRTD"
+	// mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD"
+	// mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD"
+	// wasm:"F64Sqrt"
+	return math.Sqrt(x)
+}
+
+func sqrt32(x float32) float32 {
+	// amd64:"SQRTSS"
+	// 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS"
+	// arm64:"FSQRTS"
+	// arm/7:"SQRTF"
+	// mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF"
+	// mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF"
+	// wasm:"F32Sqrt"
+	return float32(math.Sqrt(float64(x)))
+}
+
+// Check that it's using integer registers
+func abs(x, y float64) {
+	// amd64:"BTRQ\t[$]63"
+	// arm64:"FABSD\t"
+	// s390x:"LPDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64:"FABS\t"
+	// ppc64le:"FABS\t"
+	// wasm:"F64Abs"
+	// arm/6:"ABSD\t"
+	sink64[0] = math.Abs(x)
+
+	// amd64:"BTRQ\t[$]63","PXOR"    (TODO: this should be BTSQ)
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64:"FNABS\t"
+	// ppc64le:"FNABS\t"
+	sink64[1] = -math.Abs(y)
+}
+
+// Check that it's using integer registers
+func abs32(x float32) float32 {
+	// s390x:"LPDFR",-"LDEBR",-"LEDBR"     (no float64 conversion)
+	return float32(math.Abs(float64(x)))
+}
+
+// Check that it's using integer registers
+func copysign(a, b, c float64) {
+	// amd64:"BTRQ\t[$]63","ANDQ","ORQ"
+	// s390x:"CPSDR",-"MOVD"         (no integer load/store)
+	// ppc64:"FCPSGN"
+	// ppc64le:"FCPSGN"
+	// wasm:"F64Copysign"
+	sink64[0] = math.Copysign(a, b)
+
+	// amd64:"BTSQ\t[$]63"
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64:"FCPSGN"
+	// ppc64le:"FCPSGN"
+	// arm64:"ORR", -"AND"
+	sink64[1] = math.Copysign(c, -1)
+
+	// Like math.Copysign(c, -1), but with integer operations. Useful
+	// for platforms that have a copysign opcode to see if it's detected.
+	// s390x:"LNDFR\t",-"MOVD\t"     (no integer load/store)
+	sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
+
+	// amd64:"ANDQ","ORQ"
+	// s390x:"CPSDR\t",-"MOVD\t"     (no integer load/store)
+	// ppc64:"FCPSGN"
+	// ppc64le:"FCPSGN"
+	sink64[3] = math.Copysign(-1, c)
+}
+
+func fma(x, y, z float64) float64 {
+	// amd64:"VFMADD231SD"
+	// arm/6:"FMULAD"
+	// arm64:"FMADDD"
+	// s390x:"FMADD"
+	// ppc64:"FMADD"
+	// ppc64le:"FMADD"
+	return math.FMA(x, y, z)
+}
+
+func fromFloat64(f64 float64) uint64 {
+	// amd64:"MOVQ\tX.*, [^X].*"
+	// arm64:"FMOVD\tF.*, R.*"
+	// ppc64:"MFVSRD"
+	// ppc64le:"MFVSRD"
+	return math.Float64bits(f64+1) + 1
+}
+
+func fromFloat32(f32 float32) uint32 {
+	// amd64:"MOVL\tX.*, [^X].*"
+	// arm64:"FMOVS\tF.*, R.*"
+	return math.Float32bits(f32+1) + 1
+}
+
+func toFloat64(u64 uint64) float64 {
+	// amd64:"MOVQ\t[^X].*, X.*"
+	// arm64:"FMOVD\tR.*, F.*"
+	// ppc64:"MTVSRD"
+	// ppc64le:"MTVSRD"
+	return math.Float64frombits(u64+1) + 1
+}
+
+func toFloat32(u32 uint32) float32 {
+	// amd64:"MOVL\t[^X].*, X.*"
+	// arm64:"FMOVS\tR.*, F.*"
+	return math.Float32frombits(u32+1) + 1
+}
+
+// Test that comparisons with constants converted to float
+// are evaluated at compile-time
+
+func constantCheck64() bool {
+	// amd64:"(MOVB\t[$]0)|(XORL\t[A-Z][A-Z0-9]+, [A-Z][A-Z0-9]+)",-"FCMP",-"MOVB\t[$]1"
+	// s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1,"
+	return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63))
+}
+
+func constantCheck32() bool {
+	// amd64:"MOV(B|L)\t[$]1",-"FCMP",-"MOV(B|L)\t[$]0"
+	// s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0,"
+	return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31))
+}
+
+// Test that integer constants are converted to floating point constants
+// at compile-time
+
+func constantConvert32(x float32) float32 {
+	// amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)"
+	// s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// ppc64:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// ppc64le:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+	// arm64:"FMOVS\t[$]\\(1.0\\)"
+	if x > math.Float32frombits(0x3f800000) {
+		return -x
+	}
+	return x
+}
+
+func constantConvertInt32(x uint32) uint32 {
+	// amd64:-"MOVSS"
+	// s390x:-"FMOVS"
+	// ppc64:-"FMOVS"
+	// ppc64le:-"FMOVS"
+	// arm64:-"FMOVS"
+	if x > math.Float32bits(1) {
+		return -x
+	}
+	return x
+}
+
+func nanGenerate64() float64 {
+	// Test to make sure we don't generate a NaN while constant propagating.
+	// See issue 36400.
+	zero := 0.0
+	// amd64:-"DIVSD"
+	inf := 1 / zero // +inf. We can constant propagate this one.
+	negone := -1.0
+
+	// amd64:"DIVSD"
+	z0 := zero / zero
+	// amd64:"MULSD"
+	z1 := zero * inf
+	// amd64:"SQRTSD"
+	z2 := math.Sqrt(negone)
+	return z0 + z1 + z2
+}
+
+func nanGenerate32() float32 {
+	zero := float32(0.0)
+	// amd64:-"DIVSS"
+	inf := 1 / zero // +inf. We can constant propagate this one.
+
+	// amd64:"DIVSS"
+	z0 := zero / zero
+	// amd64:"MULSS"
+	z1 := zero * inf
+	return z0 + z1
+}
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
new file mode 100644
index 0000000..03012ef
--- /dev/null
+++ b/test/codegen/mathbits.go
@@ -0,0 +1,738 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ----------------------- //
+//    bits.LeadingZeros    //
+// ----------------------- //
+
+func LeadingZeros(n uint) int {
+	// amd64:"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.LeadingZeros(n)
+}
+
+func LeadingZeros64(n uint64) int {
+	// amd64:"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.LeadingZeros64(n)
+}
+
+func LeadingZeros32(n uint32) int {
+	// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZW"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.LeadingZeros32(n)
+}
+
+func LeadingZeros16(n uint16) int {
+	// amd64:"BSRL","LEAL",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.LeadingZeros16(n)
+}
+
+func LeadingZeros8(n uint8) int {
+	// amd64:"BSRL","LEAL",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.LeadingZeros8(n)
+}
+
+// --------------- //
+//    bits.Len*    //
+// --------------- //
+
+func Len(n uint) int {
+	// amd64:"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.Len(n)
+}
+
+func Len64(n uint64) int {
+	// amd64:"BSRQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	// ppc64le:"SUBC","CNTLZD"
+	// ppc64:"SUBC","CNTLZD"
+	return bits.Len64(n)
+}
+
+func SubFromLen64(n uint64) int {
+	// ppc64le:"CNTLZD",-"SUBC"
+	// ppc64:"CNTLZD",-"SUBC"
+	return 64 - bits.Len64(n)
+}
+
+func Len32(n uint32) int {
+	// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.Len32(n)
+}
+
+func Len16(n uint16) int {
+	// amd64:"BSRL","LEAL",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.Len16(n)
+}
+
+func Len8(n uint8) int {
+	// amd64:"BSRL","LEAL",-"CMOVQEQ"
+	// s390x:"FLOGR"
+	// arm:"CLZ" arm64:"CLZ"
+	// mips:"CLZ"
+	// wasm:"I64Clz"
+	return bits.Len8(n)
+}
+
+// -------------------- //
+//    bits.OnesCount    //
+// -------------------- //
+
+// TODO(register args) Restore a m d 6 4 :.*x86HasPOPCNT when only one ABI is tested.
+func OnesCount(n uint) int {
+	// amd64:"POPCNTQ"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64:"POPCNTD"
+	// ppc64le:"POPCNTD"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount(n)
+}
+
+func OnesCount64(n uint64) int {
+	// amd64:"POPCNTQ"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64:"POPCNTD"
+	// ppc64le:"POPCNTD"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount64(n)
+}
+
+func OnesCount32(n uint32) int {
+	// amd64:"POPCNTL"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64:"POPCNTW"
+	// ppc64le:"POPCNTW"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount32(n)
+}
+
+func OnesCount16(n uint16) int {
+	// amd64:"POPCNTL"
+	// arm64:"VCNT","VUADDLV"
+	// s390x:"POPCNT"
+	// ppc64:"POPCNTW"
+	// ppc64le:"POPCNTW"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount16(n)
+}
+
+func OnesCount8(n uint8) int {
+	// s390x:"POPCNT"
+	// ppc64:"POPCNTB"
+	// ppc64le:"POPCNTB"
+	// wasm:"I64Popcnt"
+	return bits.OnesCount8(n)
+}
+
+// ----------------------- //
+//    bits.ReverseBytes    //
+// ----------------------- //
+
+func ReverseBytes(n uint) uint {
+	// amd64:"BSWAPQ"
+	// s390x:"MOVDBR"
+	// arm64:"REV"
+	return bits.ReverseBytes(n)
+}
+
+func ReverseBytes64(n uint64) uint64 {
+	// amd64:"BSWAPQ"
+	// s390x:"MOVDBR"
+	// arm64:"REV"
+	return bits.ReverseBytes64(n)
+}
+
+func ReverseBytes32(n uint32) uint32 {
+	// amd64:"BSWAPL"
+	// s390x:"MOVWBR"
+	// arm64:"REVW"
+	return bits.ReverseBytes32(n)
+}
+
+func ReverseBytes16(n uint16) uint16 {
+	// amd64:"ROLW"
+	// arm64:"REV16W",-"UBFX",-"ORR"
+	// arm/5:"SLL","SRL","ORR"
+	// arm/6:"REV16"
+	// arm/7:"REV16"
+	return bits.ReverseBytes16(n)
+}
+
+// --------------------- //
+//    bits.RotateLeft    //
+// --------------------- //
+
+func RotateLeft64(n uint64) uint64 {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64:"ROTL"
+	// ppc64le:"ROTL"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
+	// wasm:"I64Rotl"
+	return bits.RotateLeft64(n, 37)
+}
+
+func RotateLeft32(n uint32) uint32 {
+	// amd64:"ROLL" 386:"ROLL"
+	// arm:`MOVW\tR[0-9]+@>23`
+	// arm64:"RORW"
+	// ppc64:"ROTLW"
+	// ppc64le:"ROTLW"
+	// s390x:"RLL"
+	// wasm:"I32Rotl"
+	return bits.RotateLeft32(n, 9)
+}
+
+func RotateLeft16(n uint16) uint16 {
+	// amd64:"ROLW" 386:"ROLW"
+	return bits.RotateLeft16(n, 5)
+}
+
+func RotateLeft8(n uint8) uint8 {
+	// amd64:"ROLB" 386:"ROLB"
+	return bits.RotateLeft8(n, 5)
+}
+
+func RotateLeftVariable(n uint, m int) uint {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64:"ROTL"
+	// ppc64le:"ROTL"
+	// s390x:"RLLG"
+	// wasm:"I64Rotl"
+	return bits.RotateLeft(n, m)
+}
+
+func RotateLeftVariable64(n uint64, m int) uint64 {
+	// amd64:"ROLQ"
+	// arm64:"ROR"
+	// ppc64:"ROTL"
+	// ppc64le:"ROTL"
+	// s390x:"RLLG"
+	// wasm:"I64Rotl"
+	return bits.RotateLeft64(n, m)
+}
+
+func RotateLeftVariable32(n uint32, m int) uint32 {
+	// arm:`MOVW\tR[0-9]+@>R[0-9]+`
+	// amd64:"ROLL"
+	// arm64:"RORW"
+	// ppc64:"ROTLW"
+	// ppc64le:"ROTLW"
+	// s390x:"RLL"
+	// wasm:"I32Rotl"
+	return bits.RotateLeft32(n, m)
+}
+
+// ------------------------ //
+//    bits.TrailingZeros    //
+// ------------------------ //
+
+func TrailingZeros(n uint) int {
+	// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+	// arm:"CLZ"
+	// arm64:"RBIT","CLZ"
+	// s390x:"FLOGR"
+	// ppc64/power8:"ANDN","POPCNTD"
+	// ppc64le/power8:"ANDN","POPCNTD"
+	// ppc64/power9: "CNTTZD"
+	// ppc64le/power9: "CNTTZD"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros(n)
+}
+
+func TrailingZeros64(n uint64) int {
+	// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+	// arm64:"RBIT","CLZ"
+	// s390x:"FLOGR"
+	// ppc64/power8:"ANDN","POPCNTD"
+	// ppc64le/power8:"ANDN","POPCNTD"
+	// ppc64/power9: "CNTTZD"
+	// ppc64le/power9: "CNTTZD"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros64(n)
+}
+
+func TrailingZeros64Subtract(n uint64) int {
+	// ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD"
+	// ppc64le/power9:"SUBC","CNTTZD"
+	return bits.TrailingZeros64(1 - n)
+}
+
+func TrailingZeros32(n uint32) int {
+	// amd64:"BTSQ\\t\\$32","BSFQ"
+	// arm:"CLZ"
+	// arm64:"RBITW","CLZW"
+	// s390x:"FLOGR","MOVWZ"
+	// ppc64/power8:"ANDN","POPCNTW"
+	// ppc64le/power8:"ANDN","POPCNTW"
+	// ppc64/power9: "CNTTZW"
+	// ppc64le/power9: "CNTTZW"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros32(n)
+}
+
+func TrailingZeros16(n uint16) int {
+	// amd64:"BSFL","BTSL\\t\\$16"
+	// 386:"BSFL\t"
+	// arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
+	// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
+	// s390x:"FLOGR","OR\t\\$65536"
+	// ppc64/power8:"POPCNTD","OR\\t\\$65536"
+	// ppc64le/power8:"POPCNTD","OR\\t\\$65536"
+	// ppc64/power9:"CNTTZD","OR\\t\\$65536"
+	// ppc64le/power9:"CNTTZD","OR\\t\\$65536"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros16(n)
+}
+
+func TrailingZeros8(n uint8) int {
+	// amd64:"BSFL","BTSL\\t\\$8"
+	// arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
+	// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
+	// s390x:"FLOGR","OR\t\\$256"
+	// wasm:"I64Ctz"
+	return bits.TrailingZeros8(n)
+}
+
+// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
+
+func IterateBits(n uint) int {
+	i := 0
+	for n != 0 {
+		// amd64:"BSFQ",-"CMOVEQ"
+		i += bits.TrailingZeros(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits64(n uint64) int {
+	i := 0
+	for n != 0 {
+		// amd64:"BSFQ",-"CMOVEQ"
+		i += bits.TrailingZeros64(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits32(n uint32) int {
+	i := 0
+	for n != 0 {
+		// amd64:"BSFL",-"BTSQ"
+		i += bits.TrailingZeros32(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits16(n uint16) int {
+	i := 0
+	for n != 0 {
+		// amd64:"BSFL",-"BTSL"
+		// arm64:"RBITW","CLZW",-"ORR"
+		i += bits.TrailingZeros16(n)
+		n &= n - 1
+	}
+	return i
+}
+
+func IterateBits8(n uint8) int {
+	i := 0
+	for n != 0 {
+		// amd64:"BSFL",-"BTSL"
+		// arm64:"RBITW","CLZW",-"ORR"
+		i += bits.TrailingZeros8(n)
+		n &= n - 1
+	}
+	return i
+}
+
+// --------------- //
+//    bits.Add*    //
+// --------------- //
+
+func Add(x, y, ci uint) (r, co uint) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	return bits.Add(x, y, ci)
+}
+
+func AddC(x, ci uint) (r, co uint) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	return bits.Add(x, 7, ci)
+}
+
+func AddZ(x, y uint) (r, co uint) {
+	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+	// s390x:"ADDC",-"ADDC\t[$]-1,"
+	return bits.Add(x, y, 0)
+}
+
+func AddR(x, y, ci uint) uint {
+	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	r, _ := bits.Add(x, y, ci)
+	return r
+}
+
+func AddM(p, q, r *[3]uint) {
+	var c uint
+	r[0], c = bits.Add(p[0], q[0], c)
+	// arm64:"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+	// s390x:"ADDE",-"ADDC\t[$]-1,"
+	r[1], c = bits.Add(p[1], q[1], c)
+	r[2], c = bits.Add(p[2], q[2], c)
+}
+
+func Add64(x, y, ci uint64) (r, co uint64) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// ppc64: "ADDC", "ADDE", "ADDZE"
+	// ppc64le: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	return bits.Add64(x, y, ci)
+}
+
+func Add64C(x, ci uint64) (r, co uint64) {
+	// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+	// ppc64: "ADDC", "ADDE", "ADDZE"
+	// ppc64le: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	return bits.Add64(x, 7, ci)
+}
+
+func Add64Z(x, y uint64) (r, co uint64) {
+	// arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+	// ppc64: "ADDC", "ADDE", "ADDZE"
+	// ppc64le: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDC",-"ADDC\t[$]-1,"
+	return bits.Add64(x, y, 0)
+}
+
+func Add64R(x, y, ci uint64) uint64 {
+	// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+	// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+	// ppc64: "ADDC", "ADDE", "ADDZE"
+	// ppc64le: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE","ADDC\t[$]-1,"
+	r, _ := bits.Add64(x, y, ci)
+	return r
+}
+func Add64M(p, q, r *[3]uint64) {
+	var c uint64
+	r[0], c = bits.Add64(p[0], q[0], c)
+	// arm64:"ADCS",-"ADD\t",-"CMP"
+	// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+	// ppc64: "ADDC", "ADDE", "ADDZE"
+	// ppc64le: "ADDC", "ADDE", "ADDZE"
+	// s390x:"ADDE",-"ADDC\t[$]-1,"
+	r[1], c = bits.Add64(p[1], q[1], c)
+	r[2], c = bits.Add64(p[2], q[2], c)
+}
+
+func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64PanicOnOverflowNE(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64PanicOnOverflowGT(a, b uint64) uint64 {
+	r, c := bits.Add64(a, b, 0)
+	// s390x:"BRC\t[$]3,",-"ADDE"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Add64(a[0], b[0], c)
+	r[1], c = bits.Add64(a[1], b[1], c)
+	// s390x:"BRC\t[$]3,"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+// --------------- //
+//    bits.Sub*    //
+// --------------- //
+
+func Sub(x, y, ci uint) (r, co uint) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	return bits.Sub(x, y, ci)
+}
+
+func SubC(x, ci uint) (r, co uint) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	return bits.Sub(x, 7, ci)
+}
+
+func SubZ(x, y uint) (r, co uint) {
+	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+	// s390x:"SUBC"
+	return bits.Sub(x, y, 0)
+}
+
+func SubR(x, y, ci uint) uint {
+	// amd64:"NEGL","SBBQ",-"NEGQ"
+	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	r, _ := bits.Sub(x, y, ci)
+	return r
+}
+func SubM(p, q, r *[3]uint) {
+	var c uint
+	r[0], c = bits.Sub(p[0], q[0], c)
+	// amd64:"SBBQ",-"NEGL",-"NEGQ"
+	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	r[1], c = bits.Sub(p[1], q[1], c)
+	r[2], c = bits.Sub(p[2], q[2], c)
+}
+
+func Sub64(x, y, ci uint64) (r, co uint64) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	return bits.Sub64(x, y, ci)
+}
+
+func Sub64C(x, ci uint64) (r, co uint64) {
+	// amd64:"NEGL","SBBQ","NEGQ"
+	// arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	return bits.Sub64(x, 7, ci)
+}
+
+func Sub64Z(x, y uint64) (r, co uint64) {
+	// amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+	// arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+	// s390x:"SUBC"
+	return bits.Sub64(x, y, 0)
+}
+
+func Sub64R(x, y, ci uint64) uint64 {
+	// amd64:"NEGL","SBBQ",-"NEGQ"
+	// arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	r, _ := bits.Sub64(x, y, ci)
+	return r
+}
+func Sub64M(p, q, r *[3]uint64) {
+	var c uint64
+	r[0], c = bits.Sub64(p[0], q[0], c)
+	// amd64:"SBBQ",-"NEGL",-"NEGQ"
+	// arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+	// s390x:"SUBE"
+	r[1], c = bits.Sub64(p[1], q[1], c)
+	r[2], c = bits.Sub64(p[2], q[2], c)
+}
+
+func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64PanicOnOverflowNE(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64PanicOnOverflowGT(a, b uint64) uint64 {
+	r, b := bits.Sub64(a, b, 0)
+	// s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+	if b > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c == 1 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c != 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+	var r [2]uint64
+	var c uint64
+	r[0], c = bits.Sub64(a[0], b[0], c)
+	r[1], c = bits.Sub64(a[1], b[1], c)
+	// s390x:"BRC\t[$]12,"
+	if c > 0 {
+		panic("overflow")
+	}
+	return r
+}
+
+// --------------- //
+//    bits.Mul*    //
+// --------------- //
+
+func Mul(x, y uint) (hi, lo uint) {
+	// amd64:"MULQ"
+	// arm64:"UMULH","MUL"
+	// ppc64:"MULHDU","MULLD"
+	// ppc64le:"MULHDU","MULLD"
+	// s390x:"MLGR"
+	// mips64: "MULVU"
+	return bits.Mul(x, y)
+}
+
+func Mul64(x, y uint64) (hi, lo uint64) {
+	// amd64:"MULQ"
+	// arm64:"UMULH","MUL"
+	// ppc64:"MULHDU","MULLD"
+	// ppc64le:"MULHDU","MULLD"
+	// s390x:"MLGR"
+	// mips64: "MULVU"
+	return bits.Mul64(x, y)
+}
+
+// --------------- //
+//    bits.Div*    //
+// --------------- //
+
+func Div(hi, lo, x uint) (q, r uint) {
+	// amd64:"DIVQ"
+	return bits.Div(hi, lo, x)
+}
+
+func Div32(hi, lo, x uint32) (q, r uint32) {
+	// arm64:"ORR","UDIV","MSUB",-"UREM"
+	return bits.Div32(hi, lo, x)
+}
+
+func Div64(hi, lo, x uint64) (q, r uint64) {
+	// amd64:"DIVQ"
+	return bits.Div64(hi, lo, x)
+}
+
+func Div64degenerate(x uint64) (q, r uint64) {
+	// amd64:-"DIVQ"
+	return bits.Div64(0, x, 5)
+}
diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go
new file mode 100644
index 0000000..d74dae0
--- /dev/null
+++ b/test/codegen/memcombine.go
@@ -0,0 +1,675 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+	"encoding/binary"
+	"runtime"
+)
+
+var sink64 uint64
+var sink32 uint32
+var sink16 uint16
+
+// ------------- //
+//    Loading    //
+// ------------- //
+
+func load_le64(b []byte) {
+	// amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVDBR\s\(.*\),`
+	// arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]Z`
+	sink64 = binary.LittleEndian.Uint64(b)
+}
+
+func load_le64_idx(b []byte, idx int) {
+	// amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
+	sink64 = binary.LittleEndian.Uint64(b[idx:])
+}
+
+func load_le32(b []byte) {
+	// amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+	// 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWBR\s\(.*\),`
+	// arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
+	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+	sink32 = binary.LittleEndian.Uint32(b)
+}
+
+func load_le32_idx(b []byte, idx int) {
+	// amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+	// 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
+	// ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+	sink32 = binary.LittleEndian.Uint32(b[idx:])
+}
+
+func load_le16(b []byte) {
+	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\s`,-`MOVBZ`
+	// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+	// s390x:`MOVHBR\s\(.*\),`
+	sink16 = binary.LittleEndian.Uint16(b)
+}
+
+func load_le16_idx(b []byte, idx int) {
+	// amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\s`,-`MOVBZ`
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+	// s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
+	sink16 = binary.LittleEndian.Uint16(b[idx:])
+}
+
+func load_be64(b []byte) {
+	// amd64:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVD\s\(.*\),`
+	// arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+	sink64 = binary.BigEndian.Uint64(b)
+}
+
+func load_be64_idx(b []byte, idx int) {
+	// amd64:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// s390x:`MOVD\s\(.*\)\(.*\*1\),`
+	// arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+	sink64 = binary.BigEndian.Uint64(b[idx:])
+}
+
+func load_be32(b []byte) {
+	// amd64:`BSWAPL`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWZ\s\(.*\),`
+	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
+	sink32 = binary.BigEndian.Uint32(b)
+}
+
+func load_be32_idx(b []byte, idx int) {
+	// amd64:`BSWAPL`,-`MOV[BW]`,-`OR`
+	// s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
+	// arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
+	// ppc64le:`MOVWBR`,-`MOV[BH]Z`
+	sink32 = binary.BigEndian.Uint32(b[idx:])
+}
+
+func load_be16(b []byte) {
+	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+	sink16 = binary.BigEndian.Uint16(b)
+}
+
+func load_be16_idx(b []byte, idx int) {
+	// amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+	// arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+	sink16 = binary.BigEndian.Uint16(b[idx:])
+}
+
+func load_le_byte2_uint16(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
+	return uint16(s[0]) | uint16(s[1])<<8
+}
+
+func load_le_byte2_uint16_inv(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVDZ`
+	return uint16(s[1])<<8 | uint16(s[0])
+}
+
+func load_le_byte4_uint32(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	// 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	// amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+	// ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
+	return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+}
+
+func load_le_byte4_uint32_inv(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
+}
+
+func load_le_byte8_uint64(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	// amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
+	// ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
+}
+
+func load_le_byte8_uint64_inv(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
+}
+
+func load_be_byte2_uint16(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+	return uint16(s[0])<<8 | uint16(s[1])
+}
+
+func load_be_byte2_uint16_inv(s []byte) uint16 {
+	// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+	// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+	return uint16(s[1]) | uint16(s[0])<<8
+}
+
+func load_be_byte4_uint32(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+	return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
+}
+
+func load_be_byte4_uint32_inv(s []byte) uint32 {
+	// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+	// amd64:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
+	return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
+}
+
+func load_be_byte8_uint64(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
+}
+
+func load_be_byte8_uint64_inv(s []byte) uint64 {
+	// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+	// amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+	// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+	return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
+}
+
+func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx]) | uint16(s[idx+1])<<8
+}
+
+func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+	// 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx+1])<<8 | uint16(s[idx])
+}
+
+func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	// amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
+	return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
+}
+
+func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
+}
+
+func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	// amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
+	return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
+}
+
+func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
+}
+
+func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx])<<8 | uint16(s[idx+1])
+}
+
+func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+	// amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+	return uint16(s[idx+1]) | uint16(s[idx])<<8
+}
+
+func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+	return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
+}
+
+func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
+}
+
+func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+	return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
+}
+
+func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+	return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
+}
+
+func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
+}
+
+func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+	return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
+}
+
+func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
+}
+
+func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+	return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
+}
+
+func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+	return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
+}
+
+func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+	// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+	return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
+}
+
+func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
+	// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+	return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
+}
+
+func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
+	// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+	return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
+}
+
+// Check load combining across function calls.
+
+func fcall_byte(a [2]byte) [2]byte {
+	return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
+}
+
+func fcall_uint16(a [2]uint16) [2]uint16 {
+	return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
+}
+
+func fcall_uint32(a [2]uint32) [2]uint32 {
+	return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
+}
+
+// We want to merge load+op in the first function, but not in the
+// second. See Issue 19595.
+func load_op_merge(p, q *int) {
+	x := *p // amd64:`ADDQ\t\(`
+	*q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
+}
+func load_op_no_merge(p, q *int) {
+	x := *p
+	for i := 0; i < 10; i++ {
+		*q += x // amd64:`ADDQ\t[A-Z]`
+	}
+}
+
+// Make sure offsets are folded into loads and stores.
+func offsets_fold(_, a [20]byte) (b [20]byte) {
+	// arm64:`MOVD\t""\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, ""\.b\+[0-9]+\(FP\)`
+	b = a
+	return
+}
+
+// Make sure we don't put pointers in SSE registers across safe
+// points.
+
+func safe_point(p, q *[2]*int) {
+	a, b := p[0], p[1] // amd64:-`MOVUPS`
+	runtime.GC()
+	q[0], q[1] = a, b // amd64:-`MOVUPS`
+}
+
+// ------------- //
+//    Storing    //
+// ------------- //
+
+func store_le64(b []byte) {
+	// amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
+	// arm64:`MOVD`,-`MOV[WBH]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+	// s390x:`MOVDBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint64(b, sink64)
+}
+
+func store_le64_idx(b []byte, idx int) {
+	// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
+	// arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
+	// ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+	// s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint64(b[idx:], sink64)
+}
+
+func store_le64_load(b []byte, x *[8]byte) {
+	_ = b[8]
+	// amd64:-`MOV[BWL]`
+	// arm64:-`MOV[BWH]`
+	// ppc64le:-`MOV[BWH]`
+	// s390x:-`MOVB`,-`MOV[WH]BR`
+	binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
+}
+
+func store_le32(b []byte) {
+	// amd64:`MOVL\s`
+	// arm64:`MOVW`,-`MOV[BH]`
+	// ppc64le:`MOVW\s`
+	// s390x:`MOVWBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint32(b, sink32)
+}
+
+func store_le32_idx(b []byte, idx int) {
+	// amd64:`MOVL\s`
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
+	// ppc64le:`MOVW\s`
+	// s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint32(b[idx:], sink32)
+}
+
+func store_le16(b []byte) {
+	// amd64:`MOVW\s`
+	// arm64:`MOVH`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// s390x:`MOVHBR\s.*\(.*\)$`
+	binary.LittleEndian.PutUint16(b, sink16)
+}
+
+func store_le16_idx(b []byte, idx int) {
+	// amd64:`MOVW\s`
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// ppc64le:`MOVH\s`
+	// s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
+	binary.LittleEndian.PutUint16(b[idx:], sink16)
+}
+
+func store_be64(b []byte) {
+	// amd64:`BSWAPQ`,-`SHR.`
+	// arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
+	// ppc64le:`MOVDBR`
+	// s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint64(b, sink64)
+}
+
+func store_be64_idx(b []byte, idx int) {
+	// amd64:`BSWAPQ`,-`SHR.`
+	// arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
+	// ppc64le:`MOVDBR`
+	// s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint64(b[idx:], sink64)
+}
+
+func store_be32(b []byte) {
+	// amd64:`BSWAPL`,-`SHR.`
+	// arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint32(b, sink32)
+}
+
+func store_be32_idx(b []byte, idx int) {
+	// amd64:`BSWAPL`,-`SHR.`
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
+	// ppc64le:`MOVWBR`
+	// s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint32(b[idx:], sink32)
+}
+
+func store_be16(b []byte) {
+	// amd64:`ROLW\s\$8`,-`SHR.`
+	// arm64:`MOVH`,`REV16W`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint16(b, sink16)
+}
+
+func store_be16_idx(b []byte, idx int) {
+	// amd64:`ROLW\s\$8`,-`SHR.`
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
+	// ppc64le:`MOVHBR`
+	// s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+	binary.BigEndian.PutUint16(b[idx:], sink16)
+}
+
+func store_le_byte_2(b []byte, val uint16) {
+	_ = b[2]
+	// arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	b[1], b[2] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_2_inv(b []byte, val uint16) {
+	_ = b[2]
+	// 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	b[2], b[1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_4(b []byte, val uint32) {
+	_ = b[4]
+	// arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+	// 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	// amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
+}
+
+func store_le_byte_8(b []byte, val uint64) {
+	_ = b[8]
+	// arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
+	// amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
+}
+
+func store_be_byte_2(b []byte, val uint16) {
+	_ = b[2]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+	// amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+	b[1], b[2] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4(b []byte, val uint32) {
+	_ = b[4]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	// amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+	b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_8(b []byte, val uint64) {
+	_ = b[8]
+	// arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
+	// amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+	b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+	b[idx+1], b[idx+0] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+	b[idx+0], b[idx+1] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_4_idx(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+	b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx(b []byte, idx int, val uint16) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	b[idx+0], b[idx+1] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+	// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
+	b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
+	_, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+	// arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
+	b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+// ------------- //
+//    Zeroing    //
+// ------------- //
+
+// Check that zero stores are combined into larger stores
+
+func zero_byte_2(b1, b2 []byte) {
+	// bounds checks to guarantee safety of writes below
+	_, _ = b1[1], b2[1]
+	// arm64:"MOVH\tZR",-"MOVB"
+	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+	b1[0], b1[1] = 0, 0
+	// arm64:"MOVH\tZR",-"MOVB"
+	// 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+	// amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+	b2[1], b2[0] = 0, 0
+}
+
+func zero_byte_4(b1, b2 []byte) {
+	_, _ = b1[3], b2[3]
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
+}
+
+func zero_byte_8(b []byte) {
+	_ = b[7]
+	b[0], b[1], b[2], b[3] = 0, 0, 0, 0
+	b[4], b[5], b[6], b[7] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_16(b []byte) {
+	_ = b[15]
+	b[0], b[1], b[2], b[3] = 0, 0, 0, 0
+	b[4], b[5], b[6], b[7] = 0, 0, 0, 0
+	b[8], b[9], b[10], b[11] = 0, 0, 0, 0
+	b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_30(a *[30]byte) {
+	*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_39(a *[39]byte) {
+	*a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_2_idx(b []byte, idx int) {
+	_, _ = b[idx+0], b[idx+1]
+	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+	b[idx+0], b[idx+1] = 0, 0
+}
+
+func zero_byte_2_idx2(b []byte, idx int) {
+	_, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+	// arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+	b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
+}
+
+func zero_uint16_2(h1, h2 []uint16) {
+	_, _ = h1[1], h2[1]
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	h1[0], h1[1] = 0, 0
+	// arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+	// amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+	// 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+	h2[1], h2[0] = 0, 0
+}
+
+func zero_uint16_4(h1, h2 []uint16) {
+	_, _ = h1[3], h2[3]
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
+}
+
+func zero_uint16_8(h []uint16) {
+	_ = h[7]
+	h[0], h[1], h[2], h[3] = 0, 0, 0, 0
+	h[4], h[5], h[6], h[7] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func zero_uint32_2(w1, w2 []uint32) {
+	_, _ = w1[1], w2[1]
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	w1[0], w1[1] = 0, 0
+	// arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+	// amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+	w2[1], w2[0] = 0, 0
+}
+
+func zero_uint32_4(w1, w2 []uint32) {
+	_, _ = w1[3], w2[3]
+	w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+	w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func zero_uint64_2(d1, d2 []uint64) {
+	_, _ = d1[1], d2[1]
+	d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+	d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
new file mode 100644
index 0000000..fb8208f
--- /dev/null
+++ b/test/codegen/memops.go
@@ -0,0 +1,367 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var x [2]bool
+var x8 [2]uint8
+var x16 [2]uint16
+var x32 [2]uint32
+var x64 [2]uint64
+
+func compMem1() int {
+	// amd64:`CMPB\t"".x\+1\(SB\), [$]0`
+	if x[1] {
+		return 1
+	}
+	// amd64:`CMPB\t"".x8\+1\(SB\), [$]7`
+	if x8[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPW\t"".x16\+2\(SB\), [$]7`
+	if x16[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPL\t"".x32\+4\(SB\), [$]7`
+	if x32[1] == 7 {
+		return 1
+	}
+	// amd64:`CMPQ\t"".x64\+8\(SB\), [$]7`
+	if x64[1] == 7 {
+		return 1
+	}
+	return 0
+}
+
+type T struct {
+	x   bool
+	x8  uint8
+	x16 uint16
+	x32 uint32
+	x64 uint64
+	a   [2]int // force it passed in memory
+}
+
+func compMem2(t T) int {
+	// amd64:`CMPB\t.*\(SP\), [$]0`
+	if t.x {
+		return 1
+	}
+	// amd64:`CMPB\t.*\(SP\), [$]7`
+	if t.x8 == 7 {
+		return 1
+	}
+	// amd64:`CMPW\t.*\(SP\), [$]7`
+	if t.x16 == 7 {
+		return 1
+	}
+	// amd64:`CMPL\t.*\(SP\), [$]7`
+	if t.x32 == 7 {
+		return 1
+	}
+	// amd64:`CMPQ\t.*\(SP\), [$]7`
+	if t.x64 == 7 {
+		return 1
+	}
+	return 0
+}
+
+func compMem3(x, y *int) (int, bool) {
+	// We can do comparisons of a register with memory even if
+	// the register is used subsequently.
+	r := *x
+	// amd64:`CMPQ\t\(`
+	// 386:`CMPL\t\(`
+	return r, r < *y
+}
+
+// The following functions test that indexed load/store operations get generated.
+
+func idxInt8(x, y []int8, i int) {
+	var t int8
+	// amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	//   386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	y[i+1] = t
+	// amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	//   386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+	x[i+1] = 77
+}
+
+func idxInt16(x, y []int16, i int) {
+	var t int16
+	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	y[i+1] = t
+	// amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	//   386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	y[16*i+1] = t
+	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+	x[i+1] = 77
+	// amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	//   386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+	x[16*i+1] = 77
+}
+
+func idxInt32(x, y []int32, i int) {
+	var t int32
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	y[i+1] = t
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	t = x[2*i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	y[2*i+1] = t
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	//   386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	y[16*i+1] = t
+	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+1] = 77
+	// amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	//   386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	x[16*i+1] = 77
+}
+
+func idxInt64(x, y []int64, i int) {
+	var t int64
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	t = x[i+1]
+	// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	y[i+1] = t
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	t = x[16*i+1]
+	// amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	y[16*i+1] = t
+	// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+1] = 77
+	// amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	x[16*i+1] = 77
+}
+
+func idxFloat32(x, y []float32, i int) {
+	var t float32
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	//    arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
+	t = x[i+1]
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	//    arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
+	y[i+1] = t
+	//    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+	t = x[16*i+1]
+	//    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+	y[16*i+1] = t
+}
+
+func idxFloat64(x, y []float64, i int) {
+	var t float64
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	//    arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
+	t = x[i+1]
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	//    arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
+	y[i+1] = t
+	//    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+	t = x[16*i+1]
+	//    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+	y[16*i+1] = t
+}
+
+func idxLoadPlusOp32(x []int32, i int) int32 {
+	s := x[0]
+	// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ADDL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s += x[i+1]
+	// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `SUBL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s -= x[i+2]
+	// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	s *= x[i+3]
+	// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ANDL\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s &= x[i+4]
+	// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `ORL\t20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s |= x[i+5]
+	// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+	// amd64: `XORL\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	s ^= x[i+6]
+	return s
+}
+
+func idxLoadPlusOp64(x []int64, i int) int64 {
+	s := x[0]
+	// amd64: `ADDQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s += x[i+1]
+	// amd64: `SUBQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s -= x[i+2]
+	// amd64: `ANDQ\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s &= x[i+3]
+	// amd64: `ORQ\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s |= x[i+4]
+	// amd64: `XORQ\t40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	s ^= x[i+5]
+	return s
+}
+
+func idxStorePlusOp32(x []int32, i int, v int32) {
+	// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ADDL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+1] += v
+	// 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `SUBL\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+2] -= v
+	// 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ANDL\t[A-Z]+[0-9]*, 12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+3] &= v
+	// 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ORL\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+4] |= v
+	// 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `XORL\t[A-Z]+[0-9]*, 20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+5] ^= v
+
+	// 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ADDL\t[$]77, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+6] += 77
+	// 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ANDL\t[$]77, 28\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+7] &= 77
+	// 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `ORL\t[$]77, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+8] |= 77
+	// 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
+	// amd64: `XORL\t[$]77, 36\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+	x[i+9] ^= 77
+}
+
+func idxStorePlusOp64(x []int64, i int, v int64) {
+	// amd64: `ADDQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+1] += v
+	// amd64: `SUBQ\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+2] -= v
+	// amd64: `ANDQ\t[A-Z]+[0-9]*, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+3] &= v
+	// amd64: `ORQ\t[A-Z]+[0-9]*, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+4] |= v
+	// amd64: `XORQ\t[A-Z]+[0-9]*, 40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+5] ^= v
+
+	// amd64: `ADDQ\t[$]77, 48\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+6] += 77
+	// amd64: `ANDQ\t[$]77, 56\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+7] &= 77
+	// amd64: `ORQ\t[$]77, 64\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+8] |= 77
+	// amd64: `XORQ\t[$]77, 72\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+	x[i+9] ^= 77
+}
+
+func idxCompare(i int) int {
+	// amd64: `MOVBLZX\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	if x8[i+1] < x8[0] {
+		return 0
+	}
+	// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	if x16[i+1] < x16[0] {
+		return 0
+	}
+	// amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	if x16[16*i+1] < x16[0] {
+		return 0
+	}
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	if x32[i+1] < x32[0] {
+		return 0
+	}
+	// amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	if x32[16*i+1] < x32[0] {
+		return 0
+	}
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	if x64[i+1] < x64[0] {
+		return 0
+	}
+	// amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	if x64[16*i+1] < x64[0] {
+		return 0
+	}
+	// amd64: `MOVBLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+	if x8[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+	if x16[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+	if x16[16*i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+	if x32[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+	if x32[16*i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+	if x64[i+2] < 77 {
+		return 0
+	}
+	// amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+	if x64[16*i+2] < 77 {
+		return 0
+	}
+	return 1
+}
+
+func idxFloatOps(a []float64, b []float32, i int) (float64, float32) {
+	c := float64(7)
+	// amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c += a[i+1]
+	// amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c -= a[i+2]
+	// amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c *= a[i+3]
+	// amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+	c /= a[i+4]
+
+	d := float32(8)
+	// amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d += b[i+1]
+	// amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d -= b[i+2]
+	// amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d *= b[i+3]
+	// amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+	d /= b[i+4]
+	return c, d
+}
diff --git a/test/codegen/noextend.go b/test/codegen/noextend.go
new file mode 100644
index 0000000..424fd20
--- /dev/null
+++ b/test/codegen/noextend.go
@@ -0,0 +1,253 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var sval64 [8]int64
+var sval32 [8]int32
+var sval16 [8]int16
+var sval8 [8]int8
+var val64 [8]uint64
+var val32 [8]uint32
+var val16 [8]uint16
+var val8 [8]uint8
+
+// ----------------------------- //
+//    avoid zero/sign extensions //
+// ----------------------------- //
+
+func set16(x8 int8, u8 uint8, y8 int8, z8 uint8) {
+	// Truncate not needed, load does sign/zero extend
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval16[0] = int16(x8)
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(u8)
+
+	// AND not needed due to size
+	// ppc64:-"ANDCC"
+	// ppc64le:-"ANDCC"
+	sval16[1] = 255 & int16(x8+y8)
+
+	// ppc64:-"ANDCC"
+	// ppc64le:-"ANDCC"
+	val16[1] = 255 & uint16(u8+z8)
+
+}
+func shiftidx(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval16[0] = int16(val16[x8>>1])
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(sval16[u8>>2])
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	sval16[1] = int16(val16[x16>>1])
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val16[1] = uint16(sval16[u16>>2])
+
+}
+
+func setnox(x8 int8, u8 uint8, y8 int8, z8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
+	// Truncate not needed due to sign/zero extension on load
+
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval16[0] = int16(x8)
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(u8)
+
+	// AND not needed due to size
+	// ppc64:-"ANDCC"
+	// ppc64le:-"ANDCC"
+	sval16[1] = 255 & int16(x8+y8)
+
+	// ppc64:-"ANDCC"
+	// ppc64le:-"ANDCC"
+	val16[1] = 255 & uint16(u8+z8)
+
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval32[0] = int32(x8)
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	sval32[1] = int32(x16)
+
+	//ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val32[0] = uint32(u8)
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val32[1] = uint32(u16)
+
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval64[0] = int64(x8)
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	sval64[1] = int64(x16)
+
+	// ppc64:-"MOVW\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+	sval64[2] = int64(x32)
+
+	//ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val64[0] = uint64(u8)
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val64[1] = uint64(u16)
+
+	// ppc64:-"MOVWZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+	val64[2] = uint64(u32)
+}
+
+func cmp16(x8 int8, u8 uint8, x32 int32, u32 uint32, x64 int64, u64 uint64) bool {
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	if int16(x8) == sval16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint16(u8) == val16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(u32>>16) == val16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(u64>>48) == val16[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(x32) == sval16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(u32) == val16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(x64) == sval16[0] {
+		return true
+	}
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(u64) == val16[0] {
+		return true
+	}
+
+	return false
+}
+
+func cmp32(x8 int8, u8 uint8, x16 int16, u16 uint16, x64 int64, u64 uint64) bool {
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	if int32(x8) == sval32[0] {
+		return true
+	}
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint32(u8) == val32[0] {
+		return true
+	}
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if int32(x16) == sval32[0] {
+		return true
+	}
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint32(u16) == val32[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64:-"MOVWZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+	if int32(x64) == sval32[0] {
+		return true
+	}
+
+	// ppc64:-"MOVW\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+	if uint32(u64) == val32[0] {
+		return true
+	}
+
+	return false
+}
+
+func cmp64(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) bool {
+	// ppc64:-"MOVB\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	if int64(x8) == sval64[0] {
+		return true
+	}
+
+	// ppc64:-"MOVBZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	if uint64(u8) == val64[0] {
+		return true
+	}
+
+	// ppc64:-"MOVH\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if int64(x16) == sval64[0] {
+		return true
+	}
+
+	// ppc64:-"MOVHZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint64(u16) == val64[0] {
+		return true
+	}
+
+	// ppc64:-"MOVW\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+	if int64(x32) == sval64[0] {
+		return true
+	}
+
+	// ppc64:-"MOVWZ\tR\\d+,\\sR\\d+"
+	// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+	if uint64(u32) == val64[0] {
+		return true
+	}
+	return false
+}
diff --git a/test/codegen/race.go b/test/codegen/race.go
new file mode 100644
index 0000000..b977823
--- /dev/null
+++ b/test/codegen/race.go
@@ -0,0 +1,22 @@
+// asmcheck -race
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Check that we elide racefuncenter/racefuncexit for
+// functions with no calls (but which might panic
+// in various ways). See issue 31219.
+// amd64:-"CALL.*racefuncenter.*"
+// arm64:-"CALL.*racefuncenter.*"
+// ppc64le:-"CALL.*racefuncenter.*"
+func RaceMightPanic(a []int, i, j, k, s int) {
+	var b [4]int
+	_ = b[i]     // panicIndex
+	_ = a[i:j]   // panicSlice
+	_ = a[i:j:k] // also panicSlice
+	_ = i << s   // panicShift
+	_ = i / j    // panicDivide
+}
diff --git a/test/codegen/regabi_regalloc.go b/test/codegen/regabi_regalloc.go
new file mode 100644
index 0000000..a7b7bd5
--- /dev/null
+++ b/test/codegen/regabi_regalloc.go
@@ -0,0 +1,23 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:registerparams
+func f1(a, b int) {
+	// amd64:"MOVQ\tBX, CX", "MOVQ\tAX, BX", "MOVL\t\\$1, AX", -"MOVQ\t.*DX"
+	g(1, a, b)
+}
+
+//go:registerparams
+func f2(a, b int) {
+	// amd64:"MOVQ\tBX, AX", "MOVQ\t[AB]X, CX", -"MOVQ\t.*, BX"
+	g(b, b, b)
+}
+
+//go:noinline
+//go:registerparams
+func g(int, int, int) {}
diff --git a/test/codegen/retpoline.go b/test/codegen/retpoline.go
new file mode 100644
index 0000000..15d6a26
--- /dev/null
+++ b/test/codegen/retpoline.go
@@ -0,0 +1,14 @@
+// +build amd64
+// asmcheck -gcflags=-spectre=ret
+
+package codegen
+
+func CallFunc(f func()) {
+	// amd64:`CALL\truntime.retpoline`
+	f()
+}
+
+func CallInterface(x interface{ M() }) {
+	// amd64:`CALL\truntime.retpoline`
+	x.M()
+}
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go
new file mode 100644
index 0000000..519cc83
--- /dev/null
+++ b/test/codegen/rotate.go
@@ -0,0 +1,213 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ------------------- //
+//    const rotates    //
+// ------------------- //
+
+func rot64(x uint64) uint64 {
+	var a uint64
+
+	// amd64:"ROLQ\t[$]7"
+	// ppc64:"ROTL\t[$]7"
+	// ppc64le:"ROTL\t[$]7"
+	a += x<<7 | x>>57
+
+	// amd64:"ROLQ\t[$]8"
+	// arm64:"ROR\t[$]56"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
+	// ppc64:"ROTL\t[$]8"
+	// ppc64le:"ROTL\t[$]8"
+	a += x<<8 + x>>56
+
+	// amd64:"ROLQ\t[$]9"
+	// arm64:"ROR\t[$]55"
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
+	// ppc64:"ROTL\t[$]9"
+	// ppc64le:"ROTL\t[$]9"
+	a += x<<9 ^ x>>55
+
+	// s390x:"RISBGZ\t[$]0, [$]63, [$]7, "
+	// arm64:"ROR\t[$]57" // TODO this is not great line numbering, but then again, the instruction did appear
+	return a
+}
+
+func rot32(x uint32) uint32 {
+	var a uint32
+
+	// amd64:"ROLL\t[$]7"
+	// arm:"MOVW\tR\\d+@>25"
+	// ppc64:"ROTLW\t[$]7"
+	// ppc64le:"ROTLW\t[$]7"
+	a += x<<7 | x>>25
+
+	// amd64:`ROLL\t[$]8`
+	// arm:"MOVW\tR\\d+@>24"
+	// arm64:"RORW\t[$]24"
+	// s390x:"RLL\t[$]8"
+	// ppc64:"ROTLW\t[$]8"
+	// ppc64le:"ROTLW\t[$]8"
+	a += x<<8 + x>>24
+
+	// amd64:"ROLL\t[$]9"
+	// arm:"MOVW\tR\\d+@>23"
+	// arm64:"RORW\t[$]23"
+	// s390x:"RLL\t[$]9"
+	// ppc64:"ROTLW\t[$]9"
+	// ppc64le:"ROTLW\t[$]9"
+	a += x<<9 ^ x>>23
+
+	// s390x:"RLL\t[$]7"
+	// arm64:"RORW\t[$]25" // TODO this is not great line numbering, but then again, the instruction did appear
+	return a
+}
+
+func rot16(x uint16) uint16 {
+	var a uint16
+
+	// amd64:"ROLW\t[$]7"
+	a += x<<7 | x>>9
+
+	// amd64:`ROLW\t[$]8`
+	a += x<<8 + x>>8
+
+	// amd64:"ROLW\t[$]9"
+	a += x<<9 ^ x>>7
+
+	return a
+}
+
+func rot8(x uint8) uint8 {
+	var a uint8
+
+	// amd64:"ROLB\t[$]5"
+	a += x<<5 | x>>3
+
+	// amd64:`ROLB\t[$]6`
+	a += x<<6 + x>>2
+
+	// amd64:"ROLB\t[$]7"
+	a += x<<7 ^ x>>1
+
+	return a
+}
+
+// ----------------------- //
+//    non-const rotates    //
+// ----------------------- //
+
+func rot64nc(x uint64, z uint) uint64 {
+	var a uint64
+
+	z &= 63
+
+	// amd64:"ROLQ"
+	// ppc64:"ROTL"
+	// ppc64le:"ROTL"
+	a += x<<z | x>>(64-z)
+
+	// amd64:"RORQ"
+	a += x>>z | x<<(64-z)
+
+	return a
+}
+
+func rot32nc(x uint32, z uint) uint32 {
+	var a uint32
+
+	z &= 31
+
+	// amd64:"ROLL"
+	// ppc64:"ROTLW"
+	// ppc64le:"ROTLW"
+	a += x<<z | x>>(32-z)
+
+	// amd64:"RORL"
+	a += x>>z | x<<(32-z)
+
+	return a
+}
+
+func rot16nc(x uint16, z uint) uint16 {
+	var a uint16
+
+	z &= 15
+
+	// amd64:"ROLW"
+	a += x<<z | x>>(16-z)
+
+	// amd64:"RORW"
+	a += x>>z | x<<(16-z)
+
+	return a
+}
+
+func rot8nc(x uint8, z uint) uint8 {
+	var a uint8
+
+	z &= 7
+
+	// amd64:"ROLB"
+	a += x<<z | x>>(8-z)
+
+	// amd64:"RORB"
+	a += x>>z | x<<(8-z)
+
+	return a
+}
+
+// Issue 18254: rotate after inlining
+func f32(x uint32) uint32 {
+	// amd64:"ROLL\t[$]7"
+	return rot32nc(x, 7)
+}
+
+// --------------------------------------- //
+//    Combined Rotate + Masking operations //
+// --------------------------------------- //
+
+func checkMaskedRotate32(a []uint32, r int) {
+	i := 0
+
+	// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
+	i++
+	// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
+	i++
+	// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
+	// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
+	i++
+	// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
+	i++
+
+	// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
+	i++
+	// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
+	// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[3], r) & 0xFF00
+	i++
+
+	// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF
+	i++
+	// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
+	a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF
+	i++
+}
diff --git a/test/codegen/select.go b/test/codegen/select.go
new file mode 100644
index 0000000..4426924
--- /dev/null
+++ b/test/codegen/select.go
@@ -0,0 +1,20 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func f() {
+	ch1 := make(chan int)
+	ch2 := make(chan int)
+	for {
+		// amd64:-`MOVQ\t[$]0, ""..autotmp_3`
+		select {
+		case <-ch1:
+		case <-ch2:
+		default:
+		}
+	}
+}
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
new file mode 100644
index 0000000..06f6f12
--- /dev/null
+++ b/test/codegen/shift.go
@@ -0,0 +1,303 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// ------------------ //
+//   masked shifts    //
+// ------------------ //
+
+func lshMask64x64(v int64, s uint64) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v << (s & 63)
+}
+
+func rshMask64Ux64(v uint64, s uint64) uint64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v >> (s & 63)
+}
+
+func rshMask64x64(v int64, s uint64) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v >> (s & 63)
+}
+
+func lshMask32x64(v int32, s uint64) int32 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ISEL",-"ORN"
+	// ppc64:"ISEL",-"ORN"
+	return v << (s & 63)
+}
+
+func rshMask32Ux64(v uint32, s uint64) uint32 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ISEL",-"ORN"
+	// ppc64:"ISEL",-"ORN"
+	return v >> (s & 63)
+}
+
+func rshMask32x64(v int32, s uint64) int32 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ISEL",-"ORN"
+	// ppc64:"ISEL",-"ORN"
+	return v >> (s & 63)
+}
+
+func lshMask64x32(v int64, s uint32) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN"
+	// ppc64:"ANDCC",-"ORN"
+	return v << (s & 63)
+}
+
+func rshMask64Ux32(v uint64, s uint32) uint64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN"
+	// ppc64:"ANDCC",-"ORN"
+	return v >> (s & 63)
+}
+
+func rshMask64x32(v int64, s uint32) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v >> (s & 63)
+}
+
+func lshMask64x32Ext(v int64, s int32) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v << uint(s&63)
+}
+
+func rshMask64Ux32Ext(v uint64, s int32) uint64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v >> uint(s&63)
+}
+
+func rshMask64x32Ext(v int64, s int32) int64 {
+	// s390x:-"RISBGZ",-"AND",-"LOCGR"
+	// ppc64le:"ANDCC",-"ORN",-"ISEL"
+	// ppc64:"ANDCC",-"ORN",-"ISEL"
+	return v >> uint(s&63)
+}
+
+// --------------- //
+//  signed shifts  //
+// --------------- //
+
+// We do want to generate a test + panicshift for these cases.
+func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+	// amd64:"TESTB"
+	_ = x << v8
+	// amd64:"TESTW"
+	_ = x << v16
+	// amd64:"TESTL"
+	_ = x << v32
+	// amd64:"TESTQ"
+	_ = x << v64
+}
+
+// We want to avoid generating a test + panicshift for these cases.
+func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+	// amd64:-"TESTB"
+	_ = x << (v8 & 7)
+	// amd64:-"TESTW"
+	_ = x << (v16 & 15)
+	// amd64:-"TESTL"
+	_ = x << (v32 & 31)
+	// amd64:-"TESTQ"
+	_ = x << (v64 & 63)
+}
+
+// ------------------ //
+//   bounded shifts   //
+// ------------------ //
+
+func rshGuarded64(v int64, s uint) int64 {
+	if s < 64 {
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		return v >> s
+	}
+	panic("shift too large")
+}
+
+func rshGuarded64U(v uint64, s uint) uint64 {
+	if s < 64 {
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		return v >> s
+	}
+	panic("shift too large")
+}
+
+func lshGuarded64(v int64, s uint) int64 {
+	if s < 64 {
+		// s390x:-"RISBGZ",-"AND",-"LOCGR"
+		// wasm:-"Select",-".*LtU"
+		return v << s
+	}
+	panic("shift too large")
+}
+
+func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
+
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f := tab[byte(v)^b]
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[byte(v)&b]
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[byte(v)|b]
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)&h]
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)^h]
+	// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+	f += tab[uint16(v)|h]
+	// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
+	// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
+	f += tab[v&0xff]
+	// ppc64le:-".*AND",".*CLRLSLWI"
+	// ppc64:-".*AND",".*CLRLSLWI"
+	f += 2 * uint32(uint16(d))
+	// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
+	// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
+	g := 2 * uint64(uint32(d))
+	return f, g
+}
+
+func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
+
+	// ppc64le:-"AND","CLRLSLWI"
+	// ppc64:-"AND","CLRLSLWI"
+	f := (v8 & 0xF) << 2
+	// ppc64le:"CLRLSLWI"
+	// ppc64:"CLRLSLWI"
+	f += byte(v16) << 3
+	// ppc64le:-"AND","CLRLSLWI"
+	// ppc64:-"AND","CLRLSLWI"
+	g := (v16 & 0xFF) << 3
+	// ppc64le:-"AND","CLRLSLWI"
+	// ppc64:-"AND","CLRLSLWI"
+	h := (v32 & 0xFFFFF) << 2
+	// ppc64le:"CLRLSLDI"
+	// ppc64:"CLRLSLDI"
+	i := (v64 & 0xFFFFFFFF) << 5
+	// ppc64le:-"CLRLSLDI"
+	// ppc64:-"CLRLSLDI"
+	i += (v64 & 0xFFFFFFF) << 38
+	// ppc64le/power9:-"CLRLSLDI"
+	// ppc64/power9:-"CLRLSLDI"
+	i += (v64 & 0xFFFF00) << 10
+	// ppc64le/power9:-"SLD","EXTSWSLI"
+	// ppc64/power9:-"SLD","EXTSWSLI"
+	j := int64(x32+32) * 8
+	return f, g, h, i, j
+}
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+	// ppc64le:-".*MOVW"
+	f := int32(v >> 32)
+	// ppc64le:".*MOVW"
+	f += int32(v >> 31)
+	// ppc64le:-".*MOVH"
+	g := int16(v >> 48)
+	// ppc64le:".*MOVH"
+	g += int16(v >> 30)
+	// ppc64le:-".*MOVH"
+	g += int16(f >> 16)
+	// ppc64le:-".*MOVB"
+	h := int8(v >> 56)
+	// ppc64le:".*MOVB"
+	h += int8(v >> 28)
+	// ppc64le:-".*MOVB"
+	h += int8(f >> 24)
+	// ppc64le:".*MOVB"
+	h += int8(f >> 16)
+	return int64(h), uint64(g)
+}
+
+func checkShiftAndMask32(v []uint32) {
+	i := 0
+
+	// ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	// ppc64: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	v[i] = (v[i] & 0xFF00000) >> 8
+	i++
+	// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	v[i] = (v[i] & 0xFF00) >> 6
+	i++
+	// ppc64le: "MOVW\tR0"
+	// ppc64: "MOVW\tR0"
+	v[i] = (v[i] & 0xFF) >> 8
+	i++
+	// ppc64le: "MOVW\tR0"
+	// ppc64: "MOVW\tR0"
+	v[i] = (v[i] & 0xF000000) >> 28
+	i++
+	// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
+	v[i] = (v[i] >> 6) & 0xFF
+	i++
+	// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
+	v[i] = (v[i] >> 6) & 0xFF000
+	i++
+	// ppc64le: "MOVW\tR0"
+	// ppc64: "MOVW\tR0"
+	v[i] = (v[i] >> 20) & 0xFF000
+	i++
+	// ppc64le: "MOVW\tR0"
+	// ppc64: "MOVW\tR0"
+	v[i] = (v[i] >> 24) & 0xFF00
+	i++
+}
+
+func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
+	//ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	//ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
+	a[0] = a[uint8(v>>24)]
+	//ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	//ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[0] = b[uint8(v>>24)]
+	//ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	//ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
+	b[1] = b[(v>>20)&0xFF]
+	//ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
+	//ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
+	b[2] = b[v>>25]
+}
+
+// 128 bit shifts
+
+func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
+	s := bits & 63
+	ŝ := (64 - bits) & 63
+	// check that the shift operation has two commas (three operands)
+	// amd64:"SHRQ.*,.*,"
+	shr := x>>s | y<<ŝ
+	// amd64:"SHLQ.*,.*,"
+	shl := x<<s | y>>ŝ
+	return shr, shl
+}
diff --git a/test/codegen/shortcircuit.go b/test/codegen/shortcircuit.go
new file mode 100644
index 0000000..e971dca
--- /dev/null
+++ b/test/codegen/shortcircuit.go
@@ -0,0 +1,17 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func efaceExtract(e interface{}) int {
+	// This should be compiled with only
+	// a single conditional jump.
+	// amd64:-"JMP"
+	if x, ok := e.(int); ok {
+		return x
+	}
+	return 0
+}
diff --git a/test/codegen/slices.go b/test/codegen/slices.go
new file mode 100644
index 0000000..d20aa9e
--- /dev/null
+++ b/test/codegen/slices.go
@@ -0,0 +1,370 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// slice types.
+
+// ------------------ //
+//      Clear         //
+// ------------------ //
+
+// Issue #5373 optimize memset idiom
+
+func SliceClear(s []int) []int {
+	// amd64:`.*memclrNoHeapPointers`
+	for i := range s {
+		s[i] = 0
+	}
+	return s
+}
+
+func SliceClearPointers(s []*int) []*int {
+	// amd64:`.*memclrHasPointers`
+	for i := range s {
+		s[i] = nil
+	}
+	return s
+}
+
+// ------------------ //
+//      Extension     //
+// ------------------ //
+
+// Issue #21266 - avoid makeslice in append(x, make([]T, y)...)
+
+func SliceExtensionConst(s []int) []int {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, 1<<2)...)
+}
+
+func SliceExtensionConstInt64(s []int) []int {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, int64(1<<2))...)
+}
+
+func SliceExtensionConstUint64(s []int) []int {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, uint64(1<<2))...)
+}
+
+func SliceExtensionConstUint(s []int) []int {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:-`.*runtime\.panicmakeslicelen`
+	return append(s, make([]int, uint(1<<2))...)
+}
+
+func SliceExtensionPointer(s []*int, l int) []*int {
+	// amd64:`.*runtime\.memclrHasPointers`
+	// amd64:-`.*runtime\.makeslice`
+	return append(s, make([]*int, l)...)
+}
+
+func SliceExtensionVar(s []byte, l int) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarInt64(s []byte, l int64) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint64(s []byte, l uint64) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint(s []byte, l uint) []byte {
+	// amd64:`.*runtime\.memclrNoHeapPointers`
+	// amd64:-`.*runtime\.makeslice`
+	// amd64:`.*runtime\.panicmakeslicelen`
+	return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionInt64(s []int, l64 int64) []int {
+	// 386:`.*runtime\.makeslice`
+	// 386:-`.*runtime\.memclr`
+	return append(s, make([]int, l64)...)
+}
+
+// ------------------ //
+//      Make+Copy     //
+// ------------------ //
+
+// Issue #26252 - avoid memclr for make+copy
+
+func SliceMakeCopyLen(s []int) []int {
+	// amd64:`.*runtime\.mallocgc`
+	// amd64:`.*runtime\.memmove`
+	// amd64:-`.*runtime\.makeslice`
+	a := make([]int, len(s))
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyLenPtr(s []*int) []*int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.typedslicecopy
+	a := make([]*int, len(s))
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyConst(s []int) []int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.memmove`
+	a := make([]int, 4)
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyConstPtr(s []*int) []*int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.makeslice\(`
+	// amd64:-`.*runtime\.typedslicecopy
+	a := make([]*int, 4)
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoDeref(s []*int) []*int {
+	a := new([]*int)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	*a = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(*a, s)
+	return *a
+}
+
+func SliceMakeCopyNoOptNoVar(s []*int) []*int {
+	a := make([][]*int, 1)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a[0] = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a[0], s)
+	return a[0]
+}
+
+func SliceMakeCopyNoOptBlank(s []*int) []*int {
+	var a []*int
+	// amd64:-`.*runtime\.makeslicecopy`
+	_ = make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoMake(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.objectnew`
+	a := *new([]*int)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoHeapAlloc(s []*int) int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	a := make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return cap(a)
+}
+
+func SliceMakeCopyNoOptNoCap(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 0, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.typedslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptNoCopy(s []*int) []*int {
+	copy := func(x, y []*int) {}
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoOptWrongOrder(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(s, a)
+	return a
+}
+
+func SliceMakeCopyNoOptWrongAssign(s []*int) []*int {
+	var a []*int
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	s = make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s)
+	return s
+}
+
+func SliceMakeCopyNoOptCopyLength(s []*int) (int, []*int) {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	n := copy(a, s)
+	return n, a
+}
+
+func SliceMakeCopyNoOptSelfCopy(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, a)
+	return a
+}
+
+func SliceMakeCopyNoOptTargetReference(s []*int) []*int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]*int, 4)
+	// amd64:`.*runtime\.typedslicecopy`
+	// amd64:-`.*runtime\.makeslicecopy`
+	copy(a, s[:len(a)])
+	return a
+}
+
+func SliceMakeCopyNoOptCap(s []int) []int {
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.makeslice\(`
+	a := make([]int, len(s), 9)
+	// amd64:-`.*runtime\.makeslicecopy`
+	// amd64:`.*runtime\.memmove`
+	copy(a, s)
+	return a
+}
+
+func SliceMakeCopyNoMemmoveDifferentLen(s []int) []int {
+	// amd64:`.*runtime\.makeslicecopy`
+	// amd64:-`.*runtime\.memmove`
+	a := make([]int, len(s)-1)
+	// amd64:-`.*runtime\.memmove`
+	copy(a, s)
+	return a
+}
+
+// ---------------------- //
+//   Nil check of &s[0]   //
+// ---------------------- //
+// See issue 30366
+func SliceNilCheck(s []int) {
+	p := &s[0]
+	// amd64:-`TESTB`
+	_ = *p
+}
+
+// ---------------------- //
+//   Init slice literal   //
+// ---------------------- //
+// See issue 21561
+func InitSmallSliceLiteral() []int {
+	// amd64:`MOVQ\t[$]42`
+	return []int{42}
+}
+
+func InitNotSmallSliceLiteral() []int {
+	// amd64:`LEAQ\t.*stmp_`
+	return []int{
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+		42,
+	}
+}
+
+// --------------------------------------- //
+//   Test PPC64 SUBFCconst folding rules   //
+//   triggered by slice operations.        //
+// --------------------------------------- //
+
+func SliceWithConstCompare(a []int, b int) []int {
+	var c []int = []int{1, 2, 3, 4, 5}
+	if b+len(a) < len(c) {
+		// ppc64le:-"NEG"
+		// ppc64:-"NEG"
+		return c[b:]
+	}
+	return a
+}
+
+func SliceWithSubtractBound(a []int, b int) []int {
+	// ppc64le:"SUBC",-"NEG"
+	// ppc64:"SUBC",-"NEG"
+	return a[(3 - b):]
+}
diff --git a/test/codegen/smallintiface.go b/test/codegen/smallintiface.go
new file mode 100644
index 0000000..0207a0a
--- /dev/null
+++ b/test/codegen/smallintiface.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+package codegen
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+func booliface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+8\(SB\)`
+	return true
+}
+
+func smallint8iface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+2024\(SB\)`
+	return int8(-3)
+}
+
+func smalluint8iface() interface{} {
+	// amd64:`LEAQ\truntime.staticuint64s\+24\(SB\)`
+	return uint8(3)
+}
diff --git a/test/codegen/spectre.go b/test/codegen/spectre.go
new file mode 100644
index 0000000..d845da3
--- /dev/null
+++ b/test/codegen/spectre.go
@@ -0,0 +1,38 @@
+// +build amd64
+// asmcheck -gcflags=-spectre=index
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func IndexArray(x *[10]int, i int) int {
+	// amd64:`CMOVQCC`
+	return x[i]
+}
+
+func IndexString(x string, i int) byte {
+	// amd64:`CMOVQLS`
+	return x[i]
+}
+
+func IndexSlice(x []float64, i int) float64 {
+	// amd64:`CMOVQLS`
+	return x[i]
+}
+
+func SliceArray(x *[10]int, i, j int) []int {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
+
+func SliceString(x string, i, j int) string {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
+
+func SliceSlice(x []float64, i, j int) []float64 {
+	// amd64:`CMOVQHI`
+	return x[i:j]
+}
diff --git a/test/codegen/stack.go b/test/codegen/stack.go
new file mode 100644
index 0000000..f28b4a3
--- /dev/null
+++ b/test/codegen/stack.go
@@ -0,0 +1,120 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// This file contains code generation tests related to the use of the
+// stack.
+
+// Check that stack stores are optimized away.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]-4-"
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64:"TEXT\t.*, [$]0-"
+// ppc64le:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func StackStore() int {
+	var x int
+	return *(&x)
+}
+
+type T struct {
+	A, B, C, D int // keep exported fields
+	x, y, z    int // reset unexported fields
+}
+
+// Check that large structs are cleared directly (issue #24416).
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64:"TEXT\t.*, [$]0-"
+// ppc64le:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ZeroLargeStruct(x *T) {
+	t := T{}
+	*x = t
+}
+
+// Check that structs are partially initialised directly (issue #24386).
+
+// Notes:
+// - 386 fails due to spilling a register
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64:"TEXT\t.*, [$]0-"
+// ppc64le:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+// Note: that 386 currently has to spill a register.
+func KeepWanted(t *T) {
+	*t = T{A: t.A, B: t.B, C: t.C, D: t.D}
+}
+
+// Check that small array operations avoid using the stack (issue #15925).
+
+// Notes:
+// - 386 fails due to spilling a register
+// - arm & mips fail due to softfloat calls
+// amd64:"TEXT\t.*, [$]0-"
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64:"TEXT\t.*, [$]0-"
+// ppc64le:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayAdd64(a, b [4]float64) [4]float64 {
+	return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}
+}
+
+// Check that small array initialization avoids using the stack.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64:"TEXT\t.*, [$]0-"
+// ppc64le:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayInit(i, j int) [4]int {
+	return [4]int{i, 0, j, 0}
+}
+
+// Check that assembly output has matching offset and base register
+// (issue #21064).
+
+func check_asmout(b [2]int) int {
+	runtime.GC() // use some frame
+	// amd64:`.*b\+24\(SP\)`
+	// arm:`.*b\+4\(FP\)`
+	return b[1]
+}
+
+// Check that simple functions get promoted to nosplit, even when
+// they might panic in various ways. See issue 31219.
+// amd64:"TEXT\t.*NOSPLIT.*"
+func MightPanic(a []int, i, j, k, s int) {
+	_ = a[i]     // panicIndex
+	_ = a[i:j]   // panicSlice
+	_ = a[i:j:k] // also panicSlice
+	_ = i << s   // panicShift
+	_ = i / j    // panicDivide
+}
+
+// Put a defer in a loop, so second defer is not open-coded
+func Defer() {
+	for i := 0; i < 2; i++ {
+		defer func() {}()
+	}
+	// amd64:`CALL\truntime\.deferprocStack`
+	defer func() {}()
+}
diff --git a/test/codegen/strings.go b/test/codegen/strings.go
new file mode 100644
index 0000000..19e1dbd
--- /dev/null
+++ b/test/codegen/strings.go
@@ -0,0 +1,65 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// string types.
+
+func CountRunes(s string) int { // Issue #24923
+	// amd64:`.*countrunes`
+	return len([]rune(s))
+}
+
+func ToByteSlice() []byte { // Issue #24698
+	// amd64:`LEAQ\ttype\.\[3\]uint8`
+	// amd64:`CALL\truntime\.newobject`
+	// amd64:-`.*runtime.stringtoslicebyte`
+	return []byte("foo")
+}
+
+// Loading from read-only symbols should get transformed into constants.
+func ConstantLoad() {
+	// 12592 = 0x3130
+	//    50 = 0x32
+	// amd64:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+	//   386:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+	//   arm:`MOVW\t\$48`,`MOVW\t\$49`,`MOVW\t\$50`
+	// arm64:`MOVD\t\$12592`,`MOVD\t\$50`
+	//  wasm:`I64Const\t\$12592`,`I64Store16\t\$0`,`I64Const\t\$50`,`I64Store8\t\$2`
+	// mips64:`MOVV\t\$48`,`MOVV\t\$49`,`MOVV\t\$50`
+	bsink = []byte("012")
+
+	// 858927408 = 0x33323130
+	//     13620 = 0x3534
+	// amd64:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+	//   386:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+	// arm64:`MOVD\t\$858927408`,`MOVD\t\$13620`
+	//  wasm:`I64Const\t\$858927408`,`I64Store32\t\$0`,`I64Const\t\$13620`,`I64Store16\t\$4`
+	bsink = []byte("012345")
+
+	// 3978425819141910832 = 0x3736353433323130
+	// 7306073769690871863 = 0x6564636261393837
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVQ\t\$7306073769690871863`
+	//   386:`MOVL\t\$858927408, \(`,`DUFFCOPY`
+	// arm64:`MOVD\t\$3978425819141910832`,`MOVD\t\$1650538808`,`MOVD\t\$25699`,`MOVD\t\$101`
+	//  wasm:`I64Const\t\$3978425819141910832`,`I64Store\t\$0`,`I64Const\t\$7306073769690871863`,`I64Store\t\$7`
+	bsink = []byte("0123456789abcde")
+
+	// 56 = 0x38
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVB\t\$56`
+	bsink = []byte("012345678")
+
+	// 14648 = 0x3938
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVW\t\$14648`
+	bsink = []byte("0123456789")
+
+	// 1650538808 = 0x62613938
+	// amd64:`MOVQ\t\$3978425819141910832`,`MOVL\t\$1650538808`
+	bsink = []byte("0123456789ab")
+}
+
+var bsink []byte
diff --git a/test/codegen/structs.go b/test/codegen/structs.go
new file mode 100644
index 0000000..c4bcb55
--- /dev/null
+++ b/test/codegen/structs.go
@@ -0,0 +1,46 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// struct types.
+
+// ------------- //
+//    Zeroing    //
+// ------------- //
+
+type Z1 struct {
+	a, b, c int
+}
+
+func Zero1(t *Z1) { // Issue #18370
+	// amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+	*t = Z1{}
+}
+
+type Z2 struct {
+	a, b, c *int
+}
+
+func Zero2(t *Z2) {
+	// amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+	// amd64:`.*runtime[.]gcWriteBarrier.*\(SB\)`
+	*t = Z2{}
+}
+
+// ------------------ //
+//    Initializing    //
+// ------------------ //
+
+type I1 struct {
+	a, b, c, d int
+}
+
+func Init1(p *I1) { // Issue #18872
+	// amd64:`MOVQ\t[$]1`,`MOVQ\t[$]2`,`MOVQ\t[$]3`,`MOVQ\t[$]4`
+	*p = I1{1, 2, 3, 4}
+}
diff --git a/test/codegen/switch.go b/test/codegen/switch.go
new file mode 100644
index 0000000..2ac817d
--- /dev/null
+++ b/test/codegen/switch.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check code generation of switch statements.
+
+package codegen
+
+// see issue 33934
+func f(x string) int {
+	// amd64:-`cmpstring`
+	switch x {
+	case "":
+		return -1
+	case "1", "2", "3":
+		return -2
+	default:
+		return -3
+	}
+}
diff --git a/test/codegen/zerosize.go b/test/codegen/zerosize.go
new file mode 100644
index 0000000..292c5a0
--- /dev/null
+++ b/test/codegen/zerosize.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure a pointer variable and a zero-sized variable
+// aren't allocated to the same stack slot.
+// See issue 24993.
+
+package codegen
+
+func zeroSize() {
+	c := make(chan struct{})
+	// amd64:`MOVQ\t\$0, ""\.s\+56\(SP\)`
+	var s *int
+	// force s to be a stack object, also use some (fixed) stack space
+	g(&s, 1, 2, 3, 4, 5)
+
+	// amd64:`LEAQ\t""\..*\+55\(SP\)`
+	c <- struct{}{}
+}
+
+//go:noinline
+func g(**int, int, int, int, int, int) {}