summaryrefslogtreecommitdiffstats
path: root/test/codegen
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:25:22 +0000
commitf6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch)
tree7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /test/codegen
parentInitial commit. (diff)
downloadgolang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz
golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip
Adding upstream version 1.22.1.upstream/1.22.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/README152
-rw-r--r--test/codegen/addrcalc.go14
-rw-r--r--test/codegen/alloc.go34
-rw-r--r--test/codegen/arithmetic.go605
-rw-r--r--test/codegen/bitfield.go368
-rw-r--r--test/codegen/bits.go420
-rw-r--r--test/codegen/bmi.go209
-rw-r--r--test/codegen/bool.go276
-rw-r--r--test/codegen/clobberdead.go35
-rw-r--r--test/codegen/clobberdeadreg.go33
-rw-r--r--test/codegen/compare_and_branch.go244
-rw-r--r--test/codegen/comparisons.go803
-rw-r--r--test/codegen/condmove.go453
-rw-r--r--test/codegen/constants.go33
-rw-r--r--test/codegen/copy.go159
-rw-r--r--test/codegen/floats.go158
-rw-r--r--test/codegen/fuse.go197
-rw-r--r--test/codegen/ifaces.go27
-rw-r--r--test/codegen/issue22703.go535
-rw-r--r--test/codegen/issue25378.go22
-rw-r--r--test/codegen/issue31618.go22
-rw-r--r--test/codegen/issue33580.go25
-rw-r--r--test/codegen/issue38554.go15
-rw-r--r--test/codegen/issue42610.go28
-rw-r--r--test/codegen/issue48054.go31
-rw-r--r--test/codegen/issue52635.go41
-rw-r--r--test/codegen/issue54467.go59
-rw-r--r--test/codegen/issue56440.go34
-rw-r--r--test/codegen/issue58166.go23
-rw-r--r--test/codegen/issue60324.go36
-rw-r--r--test/codegen/issue60673.go18
-rw-r--r--test/codegen/issue61356.go55
-rw-r--r--test/codegen/issue63332.go14
-rw-r--r--test/codegen/logic.go41
-rw-r--r--test/codegen/mapaccess.go484
-rw-r--r--test/codegen/maps.go201
-rw-r--r--test/codegen/math.go253
-rw-r--r--test/codegen/mathbits.go869
-rw-r--r--test/codegen/memcombine.go920
-rw-r--r--test/codegen/memops.go403
-rw-r--r--test/codegen/memops_bigoffset.go71
-rw-r--r--test/codegen/noextend.go285
-rw-r--r--test/codegen/race.go22
-rw-r--r--test/codegen/regabi_regalloc.go23
-rw-r--r--test/codegen/retpoline.go43
-rw-r--r--test/codegen/rotate.go259
-rw-r--r--test/codegen/select.go20
-rw-r--r--test/codegen/shift.go476
-rw-r--r--test/codegen/shortcircuit.go17
-rw-r--r--test/codegen/slices.go426
-rw-r--r--test/codegen/smallintiface.go22
-rw-r--r--test/codegen/spectre.go39
-rw-r--r--test/codegen/stack.go115
-rw-r--r--test/codegen/strings.go80
-rw-r--r--test/codegen/structs.go48
-rw-r--r--test/codegen/switch.go185
-rw-r--r--test/codegen/writebarrier.go55
-rw-r--r--test/codegen/zerosize.go25
58 files changed, 10560 insertions, 0 deletions
diff --git a/test/codegen/README b/test/codegen/README
new file mode 100644
index 0000000..19a73d0
--- /dev/null
+++ b/test/codegen/README
@@ -0,0 +1,152 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+The codegen directory contains code generation tests for the gc
+compiler.
+
+
+- Introduction
+
+The test harness compiles Go code inside files in this directory and
+matches the generated assembly (the output of `go tool compile -S`)
+against a set of regexps to be specified in comments that follow a
+special syntax (described below). The test driver is implemented as
+an action within the GOROOT/test test suite, called "asmcheck".
+
+The codegen harness is part of the all.bash test suite, but for
+performance reasons only the codegen tests for the host machine's
+GOARCH are enabled by default, and only on GOOS=linux.
+
+To perform comprehensive tests for all the supported architectures
+(even on a non-Linux system), one can run the following command:
+
+ $ ../../bin/go test cmd/internal/testdir -run='Test/codegen' -all_codegen -v
+
+This is recommended after any change that affect the compiler's code.
+
+The test harness compiles the tests with the same go toolchain that is
+used to run the test. After writing tests for a newly added codegen
+transformation, it can be useful to first run the test harness with a
+toolchain from a released Go version (and verify that the new tests
+fail), and then re-running the tests using the devel toolchain.
+
+
+- Regexps comments syntax
+
+Instructions to match are specified inside plain comments that start
+with an architecture tag, followed by a colon and a quoted Go-style
+regexp to be matched. For example, the following test:
+
+ func Sqrt(x float64) float64 {
+ // amd64:"SQRTSD"
+ // arm64:"FSQRTD"
+ return math.Sqrt(x)
+ }
+
+verifies that math.Sqrt calls are intrinsified to a SQRTSD instruction
+on amd64, and to a FSQRTD instruction on arm64.
+
+It is possible to put multiple architectures checks into the same
+line, as:
+
+ // amd64:"SQRTSD" arm64:"FSQRTD"
+
+although this form should be avoided when doing so would make the
+regexps line excessively long and difficult to read.
+
+Comments that are on their own line will be matched against the first
+subsequent non-comment line. Inline comments are also supported; the
+regexp will be matched against the code found on the same line:
+
+ func Sqrt(x float64) float64 {
+ return math.Sqrt(x) // arm:"SQRTD"
+ }
+
+It's possible to specify a comma-separated list of regexps to be
+matched. For example, the following test:
+
+ func TZ8(n uint8) int {
+ // amd64:"BSFQ","ORQ\t\\$256"
+ return bits.TrailingZeros8(n)
+ }
+
+verifies that the code generated for a bits.TrailingZeros8 call on
+amd64 contains both a "BSFQ" instruction and an "ORQ $256".
+
+Note how the ORQ regex includes a tab char (\t). In the Go assembly
+syntax, operands are separated from opcodes by a tabulation.
+
+Regexps can be quoted using either " or `. Special characters must be
+escaped accordingly. Both of these are accepted, and equivalent:
+
+ // amd64:"ADDQ\t\\$3"
+ // amd64:`ADDQ\t\$3`
+
+and they'll match this assembly line:
+
+ ADDQ $3
+
+Negative matches can be specified using a - before the quoted regexp.
+For example:
+
+ func MoveSmall() {
+ x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+ copy(x[1:], x[:]) // arm64:-".*memmove"
+ }
+
+verifies that NO memmove call is present in the assembly generated for
+the copy() line.
+
+
+- Architecture specifiers
+
+There are three different ways to specify on which architecture a test
+should be run:
+
+* Specify only the architecture (eg: "amd64"). This indicates that the
+ check should be run on all the supported architecture variants. For
+ instance, arm checks will be run against all supported GOARM
+ variations (5,6,7).
+* Specify both the architecture and a variant, separated by a slash
+ (eg: "arm/7"). This means that the check will be run only on that
+ specific variant.
+* Specify the operating system, the architecture and the variant,
+ separated by slashes (eg: "plan9/386/sse2", "plan9/amd64/"). This is
+ needed in the rare case that you need to do a codegen test affected
+ by a specific operating system; by default, tests are compiled only
+ targeting linux.
+
+
+- Remarks, and Caveats
+
+-- Write small test functions
+
+As a general guideline, test functions should be small, to avoid
+possible interactions between unrelated lines of code that may be
+introduced, for example, by the compiler's optimization passes.
+
+Any given line of Go code could get assigned more instructions than it
+may appear from reading the source. In particular, matching all MOV
+instructions should be avoided; the compiler may add them for
+unrelated reasons and this may render the test ineffective.
+
+-- Line matching logic
+
+Regexps are always matched from the start of the instructions line.
+This means, for example, that the "MULQ" regexp is equivalent to
+"^MULQ" (^ representing the start of the line), and it will NOT match
+the following assembly line:
+
+ IMULQ $99, AX
+
+To force a match at any point of the line, ".*MULQ" should be used.
+
+For the same reason, a negative regexp like -"memmove" is not enough
+to make sure that no memmove call is included in the assembly. A
+memmove call looks like this:
+
+ CALL runtime.memmove(SB)
+
+To make sure that the "memmove" symbol does not appear anywhere in the
+assembly, the negative regexp to be used is -".*memmove".
diff --git a/test/codegen/addrcalc.go b/test/codegen/addrcalc.go
new file mode 100644
index 0000000..45552d2
--- /dev/null
+++ b/test/codegen/addrcalc.go
@@ -0,0 +1,14 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we use ADDQ instead of LEAQ when we can.
+
+func f(p *[4][2]int, x int) *int {
+ // amd64:"ADDQ",-"LEAQ"
+ return &p[x][0]
+}
diff --git a/test/codegen/alloc.go b/test/codegen/alloc.go
new file mode 100644
index 0000000..31455fd
--- /dev/null
+++ b/test/codegen/alloc.go
@@ -0,0 +1,34 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check that allocating a 0-size object does not
+// introduce a call to runtime.newobject.
+
+package codegen
+
+func zeroAllocNew1() *struct{} {
+ // 386:-`CALL\truntime\.newobject`
+ // amd64:-`CALL\truntime\.newobject`
+ // arm:-`CALL\truntime\.newobject`
+ // arm64:-`CALL\truntime\.newobject`
+ return new(struct{})
+}
+
+func zeroAllocNew2() *[0]int {
+ // 386:-`CALL\truntime\.newobject`
+ // amd64:-`CALL\truntime\.newobject`
+ // arm:-`CALL\truntime\.newobject`
+ // arm64:-`CALL\truntime\.newobject`
+ return new([0]int)
+}
+
+func zeroAllocSliceLit() []int {
+ // 386:-`CALL\truntime\.newobject`
+ // amd64:-`CALL\truntime\.newobject`
+ // arm:-`CALL\truntime\.newobject`
+ // arm64:-`CALL\truntime\.newobject`
+ return []int{}
+}
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
new file mode 100644
index 0000000..174c2db
--- /dev/null
+++ b/test/codegen/arithmetic.go
@@ -0,0 +1,605 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on integer types.
+// For codegen tests on float types, see floats.go.
+
+// ----------------- //
+// Addition //
+// ----------------- //
+
+func AddLargeConst(a uint64, out []uint64) {
+ // ppc64x/power10:"ADD\t[$]4294967296,"
+ // ppc64x/power9:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
+ // ppc64x/power8:"MOVD\t[$]1", "SLD\t[$]32" "ADD\tR[0-9]*"
+ out[0] = a + 0x100000000
+ // ppc64x/power10:"ADD\t[$]-8589934592,"
+ // ppc64x/power9:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
+ // ppc64x/power8:"MOVD\t[$]-1", "SLD\t[$]33" "ADD\tR[0-9]*"
+ out[1] = a + 0xFFFFFFFE00000000
+}
+
+// ----------------- //
+// Subtraction //
+// ----------------- //
+
+var ef int
+
+func SubMem(arr []int, b, c, d int) int {
+ // 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
+ // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
+ arr[2] -= b
+ // 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)`
+ // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)`
+ arr[3] -= b
+ // 386:`DECL\s16\([A-Z]+\)`
+ arr[4]--
+ // 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
+ arr[5] -= 20
+ // 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
+ ef -= arr[b]
+ // 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
+ arr[c] -= b
+ // 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
+ arr[d] -= 15
+ // 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)`
+ arr[b]--
+ // amd64:`DECQ\s64\([A-Z]+\)`
+ arr[8]--
+ // 386:"SUBL\t4"
+ // amd64:"SUBQ\t8"
+ return arr[0] - arr[1]
+}
+
+func SubFromConst(a int) int {
+ // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
+ b := 40 - a
+ return b
+}
+
+func SubFromConstNeg(a int) int {
+ // ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR`
+ c := 40 - (-a)
+ return c
+}
+
+func SubSubFromConst(a int) int {
+ // ppc64x: `ADD\t[$]20,\sR[0-9]+,\sR`
+ c := 40 - (20 - a)
+ return c
+}
+
+func AddSubFromConst(a int) int {
+ // ppc64x: `SUBC\tR[0-9]+,\s[$]60,\sR`
+ c := 40 + (20 - a)
+ return c
+}
+
+func NegSubFromConst(a int) int {
+ // ppc64x: `ADD\t[$]-20,\sR[0-9]+,\sR`
+ c := -(20 - a)
+ return c
+}
+
+func NegAddFromConstNeg(a int) int {
+ // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR`
+ c := -(-40 + a)
+ return c
+}
+
+func SubSubNegSimplify(a, b int) int {
+ // amd64:"NEGQ"
+ // ppc64x:"NEG"
+ r := (a - b) - a
+ return r
+}
+
+func SubAddSimplify(a, b int) int {
+ // amd64:-"SUBQ",-"ADDQ"
+ // ppc64x:-"SUB",-"ADD"
+ r := a + (b - a)
+ return r
+}
+
+func SubAddSimplify2(a, b, c int) (int, int, int, int, int, int) {
+ // amd64:-"ADDQ"
+ r := (a + b) - (a + c)
+ // amd64:-"ADDQ"
+ r1 := (a + b) - (c + a)
+ // amd64:-"ADDQ"
+ r2 := (b + a) - (a + c)
+ // amd64:-"ADDQ"
+ r3 := (b + a) - (c + a)
+ // amd64:-"SUBQ"
+ r4 := (a - c) + (c + b)
+ // amd64:-"SUBQ"
+ r5 := (a - c) + (b + c)
+ return r, r1, r2, r3, r4, r5
+}
+
+func SubAddNegSimplify(a, b int) int {
+ // amd64:"NEGQ",-"ADDQ",-"SUBQ"
+ // ppc64x:"NEG",-"ADD",-"SUB"
+ r := a - (b + a)
+ return r
+}
+
+func AddAddSubSimplify(a, b, c int) int {
+ // amd64:-"SUBQ"
+ // ppc64x:-"SUB"
+ r := a + (b + (c - a))
+ return r
+}
+
+// -------------------- //
+// Multiplication //
+// -------------------- //
+
+func Pow2Muls(n1, n2 int) (int, int) {
+ // amd64:"SHLQ\t[$]5",-"IMULQ"
+ // 386:"SHLL\t[$]5",-"IMULL"
+ // arm:"SLL\t[$]5",-"MUL"
+ // arm64:"LSL\t[$]5",-"MUL"
+ // ppc64x:"SLD\t[$]5",-"MUL"
+ a := n1 * 32
+
+ // amd64:"SHLQ\t[$]6",-"IMULQ"
+ // 386:"SHLL\t[$]6",-"IMULL"
+ // arm:"SLL\t[$]6",-"MUL"
+ // arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
+ // ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
+ b := -64 * n2
+
+ return a, b
+}
+
+func Mul_96(n int) int {
+ // amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ`
+ // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
+ // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+ // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+ // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
+ return n * 96
+}
+
+func Mul_n120(n int) int {
+ // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
+ return n * -120
+}
+
+func MulMemSrc(a []uint32, b []float32) {
+ // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
+ a[0] *= a[1]
+ // 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+ // amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+`
+ b[0] *= b[1]
+}
+
+// Multiplications merging tests
+
+func MergeMuls1(n int) int {
+ // amd64:"IMUL3Q\t[$]46"
+ // 386:"IMUL3L\t[$]46"
+ // ppc64x:"MULLD\t[$]46"
+ return 15*n + 31*n // 46n
+}
+
+func MergeMuls2(n int) int {
+ // amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
+ // 386:"IMUL3L\t[$]23","ADDL\t[$]29"
+ // ppc64x/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
+ // ppc64x/power8:"MULLD\t[$]23","ADD\t[$]29"
+ return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
+}
+
+func MergeMuls3(a, n int) int {
+ // amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19"
+ // 386:"ADDL\t[$]19",-"IMULL\t[$]19"
+ // ppc64x:"ADD\t[$]19",-"MULLD\t[$]19"
+ return a*n + 19*n // (a+19)n
+}
+
+func MergeMuls4(n int) int {
+ // amd64:"IMUL3Q\t[$]14"
+ // 386:"IMUL3L\t[$]14"
+ // ppc64x:"MULLD\t[$]14"
+ return 23*n - 9*n // 14n
+}
+
+func MergeMuls5(a, n int) int {
+ // amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19"
+ // 386:"ADDL\t[$]-19",-"IMULL\t[$]19"
+ // ppc64x:"ADD\t[$]-19",-"MULLD\t[$]19"
+ return a*n - 19*n // (a-19)n
+}
+
+// -------------- //
+// Division //
+// -------------- //
+
+func DivMemSrc(a []float64) {
+ // 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+ // amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+`
+ a[0] /= a[1]
+}
+
+func Pow2Divs(n1 uint, n2 int) (uint, int) {
+ // 386:"SHRL\t[$]5",-"DIVL"
+ // amd64:"SHRQ\t[$]5",-"DIVQ"
+ // arm:"SRL\t[$]5",-".*udiv"
+ // arm64:"LSR\t[$]5",-"UDIV"
+ // ppc64x:"SRD"
+ a := n1 / 32 // unsigned
+
+ // amd64:"SARQ\t[$]6",-"IDIVQ"
+ // 386:"SARL\t[$]6",-"IDIVL"
+ // arm:"SRA\t[$]6",-".*udiv"
+ // arm64:"ASR\t[$]6",-"SDIV"
+ // ppc64x:"SRAD"
+ b := n2 / 64 // signed
+
+ return a, b
+}
+
+// Check that constant divisions get turned into MULs
+func ConstDivs(n1 uint, n2 int) (uint, int) {
+ // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+ // 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+ // arm64:`MOVD`,`UMULH`,-`DIV`
+ // arm:`MOVW`,`MUL`,-`.*udiv`
+ a := n1 / 17 // unsigned
+
+ // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+ // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+ // arm64:`SMULH`,-`DIV`
+ // arm:`MOVW`,`MUL`,-`.*udiv`
+ b := n2 / 17 // signed
+
+ return a, b
+}
+
+func FloatDivs(a []float32) float32 {
+ // amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+ // 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+`
+ return a[1] / a[2]
+}
+
+func Pow2Mods(n1 uint, n2 int) (uint, int) {
+ // 386:"ANDL\t[$]31",-"DIVL"
+ // amd64:"ANDL\t[$]31",-"DIVQ"
+ // arm:"AND\t[$]31",-".*udiv"
+ // arm64:"AND\t[$]31",-"UDIV"
+ // ppc64x:"RLDICL"
+ a := n1 % 32 // unsigned
+
+ // 386:"SHRL",-"IDIVL"
+ // amd64:"SHRQ",-"IDIVQ"
+ // arm:"SRA",-".*udiv"
+ // arm64:"ASR",-"REM"
+ // ppc64x:"SRAD"
+ b := n2 % 64 // signed
+
+ return a, b
+}
+
+// Check that signed divisibility checks get converted to AND on low bits
+func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
+ // 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+ // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+ // arm:"AND\t[$]63",-".*udiv",-"SRA"
+ // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
+ // ppc64x:"RLDICL",-"SRAD"
+ a := n1%64 == 0 // signed divisible
+
+ // 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
+ // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
+ // arm:"AND\t[$]63",-".*udiv",-"SRA"
+ // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
+ // ppc64x:"RLDICL",-"SRAD"
+ b := n2%64 != 0 // signed indivisible
+
+ return a, b
+}
+
+// Check that constant modulo divs get turned into MULs
+func ConstMods(n1 uint, n2 int) (uint, int) {
+ // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ"
+ // 386:"MOVL\t[$]-252645135","MULL",-"DIVL"
+ // arm64:`MOVD`,`UMULH`,-`DIV`
+ // arm:`MOVW`,`MUL`,-`.*udiv`
+ a := n1 % 17 // unsigned
+
+ // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ"
+ // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL"
+ // arm64:`SMULH`,-`DIV`
+ // arm:`MOVW`,`MUL`,-`.*udiv`
+ b := n2 % 17 // signed
+
+ return a, b
+}
+
+// Check that divisibility checks x%c==0 are converted to MULs and rotates
+func DivisibleU(n uint) (bool, bool) {
+ // amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
+ // 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
+ // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
+ // arm:"MUL","CMP\t[$]715827882",-".*udiv"
+ // ppc64x:"MULLD","ROTL\t[$]63"
+ even := n%6 == 0
+
+ // amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
+ // 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
+ // arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV"
+ // arm:"MUL","CMP\t[$]226050910",-".*udiv"
+ // ppc64x:"MULLD",-"ROTL"
+ odd := n%19 == 0
+
+ return even, odd
+}
+
+func Divisible(n int) (bool, bool) {
+ // amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
+ // 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
+ // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ADD\tR","ROR",-"DIV"
+ // arm:"MUL","ADD\t[$]715827882",-".*udiv"
+ // ppc64x/power8:"MULLD","ADD","ROTL\t[$]63"
+ // ppc64x/power9:"MADDLD","ROTL\t[$]63"
+ even := n%6 == 0
+
+ // amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
+ // 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
+ // arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
+ // arm:"MUL","ADD\t[$]113025455",-".*udiv"
+ // ppc64x/power8:"MULLD","ADD",-"ROTL"
+ // ppc64x/power9:"MADDLD",-"ROTL"
+ odd := n%19 == 0
+
+ return even, odd
+}
+
+// Check that fix-up code is not generated for divisions where it has been proven that
+// that the divisor is not -1 or that the dividend is > MinIntNN.
+func NoFix64A(divr int64) (int64, int64) {
+ var d int64 = 42
+ var e int64 = 84
+ if divr > 5 {
+ d /= divr // amd64:-"JMP"
+ e %= divr // amd64:-"JMP"
+ // The following statement is to avoid conflict between the above check
+ // and the normal JMP generated at the end of the block.
+ d += e
+ }
+ return d, e
+}
+
+func NoFix64B(divd int64) (int64, int64) {
+ var d int64
+ var e int64
+ var divr int64 = -1
+ if divd > -9223372036854775808 {
+ d = divd / divr // amd64:-"JMP"
+ e = divd % divr // amd64:-"JMP"
+ d += e
+ }
+ return d, e
+}
+
+func NoFix32A(divr int32) (int32, int32) {
+ var d int32 = 42
+ var e int32 = 84
+ if divr > 5 {
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ d /= divr
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ e %= divr
+ d += e
+ }
+ return d, e
+}
+
+func NoFix32B(divd int32) (int32, int32) {
+ var d int32
+ var e int32
+ var divr int32 = -1
+ if divd > -2147483648 {
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ d = divd / divr
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ e = divd % divr
+ d += e
+ }
+ return d, e
+}
+
+func NoFix16A(divr int16) (int16, int16) {
+ var d int16 = 42
+ var e int16 = 84
+ if divr > 5 {
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ d /= divr
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ e %= divr
+ d += e
+ }
+ return d, e
+}
+
+func NoFix16B(divd int16) (int16, int16) {
+ var d int16
+ var e int16
+ var divr int16 = -1
+ if divd > -32768 {
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ d = divd / divr
+ // amd64:-"JMP"
+ // 386:-"JMP"
+ e = divd % divr
+ d += e
+ }
+ return d, e
+}
+
+// Check that len() and cap() calls divided by powers of two are
+// optimized into shifts and ands
+
+func LenDiv1(a []int) int {
+ // 386:"SHRL\t[$]10"
+ // amd64:"SHRQ\t[$]10"
+ // arm64:"LSR\t[$]10",-"SDIV"
+ // arm:"SRL\t[$]10",-".*udiv"
+ // ppc64x:"SRD"\t[$]10"
+ return len(a) / 1024
+}
+
+func LenDiv2(s string) int {
+ // 386:"SHRL\t[$]11"
+ // amd64:"SHRQ\t[$]11"
+ // arm64:"LSR\t[$]11",-"SDIV"
+ // arm:"SRL\t[$]11",-".*udiv"
+ // ppc64x:"SRD\t[$]11"
+ return len(s) / (4097 >> 1)
+}
+
+func LenMod1(a []int) int {
+ // 386:"ANDL\t[$]1023"
+ // amd64:"ANDL\t[$]1023"
+ // arm64:"AND\t[$]1023",-"SDIV"
+ // arm/6:"AND",-".*udiv"
+ // arm/7:"BFC",-".*udiv",-"AND"
+ // ppc64x:"RLDICL"
+ return len(a) % 1024
+}
+
+func LenMod2(s string) int {
+ // 386:"ANDL\t[$]2047"
+ // amd64:"ANDL\t[$]2047"
+ // arm64:"AND\t[$]2047",-"SDIV"
+ // arm/6:"AND",-".*udiv"
+ // arm/7:"BFC",-".*udiv",-"AND"
+ // ppc64x:"RLDICL"
+ return len(s) % (4097 >> 1)
+}
+
+func CapDiv(a []int) int {
+ // 386:"SHRL\t[$]12"
+ // amd64:"SHRQ\t[$]12"
+ // arm64:"LSR\t[$]12",-"SDIV"
+ // arm:"SRL\t[$]12",-".*udiv"
+ // ppc64x:"SRD\t[$]12"
+ return cap(a) / ((1 << 11) + 2048)
+}
+
+func CapMod(a []int) int {
+ // 386:"ANDL\t[$]4095"
+ // amd64:"ANDL\t[$]4095"
+ // arm64:"AND\t[$]4095",-"SDIV"
+ // arm/6:"AND",-".*udiv"
+ // arm/7:"BFC",-".*udiv",-"AND"
+ // ppc64x:"RLDICL"
+ return cap(a) % ((1 << 11) + 2048)
+}
+
+func AddMul(x int) int {
+ // amd64:"LEAQ\t1"
+ return 2*x + 1
+}
+
+func MULA(a, b, c uint32) (uint32, uint32, uint32) {
+ // arm:`MULA`,-`MUL\s`
+ // arm64:`MADDW`,-`MULW`
+ r0 := a*b + c
+ // arm:`MULA`,-`MUL\s`
+ // arm64:`MADDW`,-`MULW`
+ r1 := c*79 + a
+ // arm:`ADD`,-`MULA`,-`MUL\s`
+ // arm64:`ADD`,-`MADD`,-`MULW`
+ // ppc64x:`ADD`,-`MULLD`
+ r2 := b*64 + c
+ return r0, r1, r2
+}
+
+func MULS(a, b, c uint32) (uint32, uint32, uint32) {
+ // arm/7:`MULS`,-`MUL\s`
+ // arm/6:`SUB`,`MUL\s`,-`MULS`
+ // arm64:`MSUBW`,-`MULW`
+ r0 := c - a*b
+ // arm/7:`MULS`,-`MUL\s`
+ // arm/6:`SUB`,`MUL\s`,-`MULS`
+ // arm64:`MSUBW`,-`MULW`
+ r1 := a - c*79
+ // arm/7:`SUB`,-`MULS`,-`MUL\s`
+ // arm64:`SUB`,-`MSUBW`,-`MULW`
+ // ppc64x:`SUB`,-`MULLD`
+ r2 := c - b*64
+ return r0, r1, r2
+}
+
+func addSpecial(a, b, c uint32) (uint32, uint32, uint32) {
+ // amd64:`INCL`
+ a++
+ // amd64:`DECL`
+ b--
+ // amd64:`SUBL.*-128`
+ c += 128
+ return a, b, c
+}
+
+// Divide -> shift rules usually require fixup for negative inputs.
+// If the input is non-negative, make sure the fixup is eliminated.
+func divInt(v int64) int64 {
+ if v < 0 {
+ return 0
+ }
+ // amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9,"
+ return v / 512
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(z + C) -x -> C + (z - x)" can optimize the following cases.
+func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
+ // arm64:"SUB","ADD\t[$]2"
+ // ppc64x:"SUB","ADD\t[$]2"
+ r0 := (i0 + 3) - (j0 + 1)
+ // arm64:"SUB","SUB\t[$]4"
+ // ppc64x:"SUB","ADD\t[$]-4"
+ r1 := (i1 - 3) - (j1 + 1)
+ // arm64:"SUB","ADD\t[$]4"
+ // ppc64x:"SUB","ADD\t[$]4"
+ r2 := (i2 + 3) - (j2 - 1)
+ // arm64:"SUB","SUB\t[$]2"
+ // ppc64x:"SUB","ADD\t[$]-2"
+ r3 := (i3 - 3) - (j3 - 1)
+ return r0, r1, r2, r3
+}
+
+// The reassociate rules "x - (z + C) -> (x - z) - C" and
+// "(C - z) - x -> C - (z + x)" can optimize the following cases.
+func constantFold2(i0, j0, i1, j1 int) (int, int) {
+ // arm64:"ADD","MOVD\t[$]2","SUB"
+ // ppc64x: `SUBC\tR[0-9]+,\s[$]2,\sR`
+ r0 := (3 - i0) - (j0 + 1)
+ // arm64:"ADD","MOVD\t[$]4","SUB"
+ // ppc64x: `SUBC\tR[0-9]+,\s[$]4,\sR`
+ r1 := (3 - i1) - (j1 - 1)
+ return r0, r1
+}
+
+func constantFold3(i, j int) int {
+ // arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
+ // ppc64x:"MULLD\t[$]30","MULLD"
+ r := (5 * i) * (6 * j)
+ return r
+}
diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go
new file mode 100644
index 0000000..3276af3
--- /dev/null
+++ b/test/codegen/bitfield.go
@@ -0,0 +1,368 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to bit field
+// insertion/extraction simplifications/optimizations.
+
+func extr1(x, x2 uint64) uint64 {
+ return x<<7 + x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr2(x, x2 uint64) uint64 {
+ return x<<7 | x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr3(x, x2 uint64) uint64 {
+ return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57,"
+}
+
+func extr4(x, x2 uint32) uint32 {
+ return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr5(x, x2 uint32) uint32 {
+ return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+func extr6(x, x2 uint32) uint32 {
+ return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25,"
+}
+
+// check 32-bit shift masking
+func mask32(x uint32) uint32 {
+ return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 16-bit shift masking
+func mask16(x uint16) uint16 {
+ return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL"
+}
+
+// check 8-bit shift masking
+func mask8(x uint8) uint8 {
+ return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL"
+}
+
+func maskshift(x uint64) uint64 {
+ // arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX"
+ return ((x << 5) & (0xfff << 5)) >> 5
+}
+
+// bitfield ops
+// bfi
+func bfi1(x, y uint64) uint64 {
+ // arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f)
+}
+
+func bfi2(x, y uint64) uint64 {
+ // arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+ return (x << 24 >> 12) | (y & 0xfff0000000000fff)
+}
+
+// bfxil
+func bfxil1(x, y uint64) uint64 {
+ // arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000)
+}
+
+func bfxil2(x, y uint64) uint64 {
+ // arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
+ return (x << 12 >> 24) | (y & 0xffffff0000000000)
+}
+
+// sbfiz
+// merge shifts into sbfiz: (x << lc) >> rc && lc > rc.
+func sbfiz1(x int64) int64 {
+ // arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+ return (x << 4) >> 3
+}
+
+// merge shift and sign-extension into sbfiz.
+func sbfiz2(x int32) int64 {
+ return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL"
+}
+
+func sbfiz3(x int16) int64 {
+ return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL"
+}
+
+func sbfiz4(x int8) int64 {
+ return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL"
+}
+
+// sbfiz combinations.
+// merge shift with sbfiz into sbfiz.
+func sbfiz5(x int32) int32 {
+ // arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+ return (x << 4) >> 3
+}
+
+func sbfiz6(x int16) int64 {
+ return int64(x+1) << 3 // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
+}
+
+func sbfiz7(x int8) int64 {
+ return int64(x+1) << 62 // arm64:"SBFIZ\t[$]62, R[0-9]+, [$]2",-"LSL"
+}
+
+func sbfiz8(x int32) int64 {
+ return int64(x+1) << 40 // arm64:"SBFIZ\t[$]40, R[0-9]+, [$]24",-"LSL"
+}
+
+// sbfx
+// merge shifts into sbfx: (x << lc) >> rc && lc <= rc.
+func sbfx1(x int64) int64 {
+ return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
+}
+
+func sbfx2(x int64) int64 {
+ return (x << 60) >> 60 // arm64:"SBFX\t[$]0, R[0-9]+, [$]4",-"LSL",-"ASR"
+}
+
+// merge shift and sign-extension into sbfx.
+func sbfx3(x int32) int64 {
+ return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR"
+}
+
+func sbfx4(x int16) int64 {
+ return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR"
+}
+
+func sbfx5(x int8) int64 {
+ return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR"
+}
+
+func sbfx6(x int32) int64 {
+ return int64(x >> 30) // arm64:"SBFX\t[$]30, R[0-9]+, [$]2"
+}
+
+func sbfx7(x int16) int64 {
+ return int64(x >> 10) // arm64:"SBFX\t[$]10, R[0-9]+, [$]6"
+}
+
+func sbfx8(x int8) int64 {
+ return int64(x >> 5) // arm64:"SBFX\t[$]5, R[0-9]+, [$]3"
+}
+
+// sbfx combinations.
+// merge shifts with sbfiz into sbfx.
+func sbfx9(x int32) int32 {
+ return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
+}
+
+// merge sbfx and sign-extension into sbfx.
+func sbfx10(x int32) int64 {
+ c := x + 5
+ return int64(c >> 20) // arm64"SBFX\t[$]20, R[0-9]+, [$]12",-"MOVW\tR[0-9]+, R[0-9]+"
+}
+
+// ubfiz
+// merge shifts into ubfiz: (x<<lc)>>rc && lc>rc
+func ubfiz1(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD"
+ return (x << 4) >> 3
+}
+
+// merge shift and zero-extension into ubfiz.
+func ubfiz2(x uint32) uint64 {
+ return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL"
+}
+
+func ubfiz3(x uint16) uint64 {
+ return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
+}
+
+func ubfiz4(x uint8) uint64 {
+ return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL"
+}
+
+func ubfiz5(x uint8) uint64 {
+ return uint64(x) << 60 // arm64:"UBFIZ\t[$]60, R[0-9]+, [$]4",-"LSL"
+}
+
+func ubfiz6(x uint32) uint64 {
+ return uint64(x << 30) // arm64:"UBFIZ\t[$]30, R[0-9]+, [$]2",
+}
+
+func ubfiz7(x uint16) uint64 {
+ return uint64(x << 10) // arm64:"UBFIZ\t[$]10, R[0-9]+, [$]6",
+}
+
+func ubfiz8(x uint8) uint64 {
+ return uint64(x << 7) // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]1",
+}
+
+// merge ANDconst into ubfiz.
+func ubfiz9(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
+ // s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND"
+ return (x & 0xfff) << 3
+}
+
+func ubfiz10(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
+ // s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND"
+ return (x << 4) & 0xfff0
+}
+
+// ubfiz combinations
+func ubfiz11(x uint32) uint32 {
+ // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR"
+ return (x << 4) >> 3
+}
+
+func ubfiz12(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND"
+ return ((x & 0xfffff) << 4) >> 3
+}
+
+func ubfiz13(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND"
+ return ((x << 3) & 0xffff) << 2
+}
+
+func ubfiz14(x uint64) uint64 {
+ // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND"
+ return ((x << 5) & (0xfff << 5)) << 2
+}
+
+// ubfx
+// merge shifts into ubfx: (x<<lc)>>rc && lc<rc
+func ubfx1(x uint64) uint64 {
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD"
+ return (x << 1) >> 2
+}
+
+// merge shift and zero-extension into ubfx.
+func ubfx2(x uint32) uint64 {
+ return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR"
+}
+
+func ubfx3(x uint16) uint64 {
+ return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR"
+}
+
+func ubfx4(x uint8) uint64 {
+ return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR"
+}
+
+func ubfx5(x uint32) uint64 {
+ return uint64(x) >> 30 // arm64:"UBFX\t[$]30, R[0-9]+, [$]2"
+}
+
+func ubfx6(x uint16) uint64 {
+ return uint64(x) >> 10 // arm64:"UBFX\t[$]10, R[0-9]+, [$]6"
+}
+
+func ubfx7(x uint8) uint64 {
+ return uint64(x) >> 3 // arm64:"UBFX\t[$]3, R[0-9]+, [$]5"
+}
+
+// merge ANDconst into ubfx.
+func ubfx8(x uint64) uint64 {
+ // arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND"
+ return (x >> 25) & 1023
+}
+
+func ubfx9(x uint64) uint64 {
+ // arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND"
+ return (x & 0x0ff0) >> 4
+}
+
+// ubfx combinations.
+func ubfx10(x uint32) uint32 {
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
+ return (x << 1) >> 2
+}
+
+func ubfx11(x uint64) uint64 {
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND"
+ return ((x << 1) >> 2) & 0xfff
+}
+
+func ubfx12(x uint64) uint64 {
+ // arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND"
+ return ((x >> 3) & 0xfff) >> 1
+}
+
+func ubfx13(x uint64) uint64 {
+ // arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD"
+ return ((x >> 2) << 5) >> 8
+}
+
+func ubfx14(x uint64) uint64 {
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND"
+ return ((x & 0xfffff) << 3) >> 4
+}
+
+// merge ubfx and zero-extension into ubfx.
+func ubfx15(x uint64) bool {
+ midr := x + 10
+ part_num := uint16((midr >> 4) & 0xfff)
+ if part_num == 0xd0c { // arm64:"UBFX\t[$]4, R[0-9]+, [$]12",-"MOVHU\tR[0-9]+, R[0-9]+"
+ return true
+ }
+ return false
+}
+
+// merge ANDconst and ubfx into ubfx
+func ubfx16(x uint64) uint64 {
+ // arm64:"UBFX\t[$]4, R[0-9]+, [$]6",-"AND\t[$]63"
+ return ((x >> 3) & 0xfff) >> 1 & 0x3f
+}
+
+// Check that we don't emit comparisons for constant shifts.
+//
+//go:nosplit
+func shift_no_cmp(x int) int {
+ // arm64:`LSL\t[$]17`,-`CMP`
+ // mips64:`SLLV\t[$]17`,-`SGT`
+ return x << 17
+}
+
+func rev16(c uint64) (uint64, uint64, uint64) {
+ // arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8`
+ b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
+ // arm64:-`ADD\tR[0-9]+<<8`
+ b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8)
+ // arm64:-`EOR\tR[0-9]+<<8`
+ b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8)
+ return b1, b2, b3
+}
+
+func rev16w(c uint32) (uint32, uint32, uint32) {
+ // arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8`
+ b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+ // arm64:-`ADD\tR[0-9]+<<8`
+ b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8)
+ // arm64:-`EOR\tR[0-9]+<<8`
+ b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8)
+ return b1, b2, b3
+}
+
+func shift(x uint32, y uint16, z uint8) uint64 {
+ // arm64:-`MOVWU`,-`LSR\t[$]32`
+ a := uint64(x) >> 32
+ // arm64:-`MOVHU
+ b := uint64(y) >> 16
+ // arm64:-`MOVBU`
+ c := uint64(z) >> 8
+ // arm64:`MOVD\tZR`,-`ADD\tR[0-9]+>>16`,-`ADD\tR[0-9]+>>8`,
+ return a + b + c
+}
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
new file mode 100644
index 0000000..4b6c8b9
--- /dev/null
+++ b/test/codegen/bits.go
@@ -0,0 +1,420 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+/************************************
+ * 64-bit instructions
+ ************************************/
+
+func bitcheck64_constleft(a uint64) (n int) {
+ // amd64:"BTQ\t[$]63"
+ if a&(1<<63) != 0 {
+ return 1
+ }
+ // amd64:"BTQ\t[$]60"
+ if a&(1<<60) != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if a&(1<<0) != 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck64_constright(a [8]uint64) (n int) {
+ // amd64:"BTQ\t[$]63"
+ if (a[0]>>63)&1 != 0 {
+ return 1
+ }
+ // amd64:"BTQ\t[$]63"
+ if a[1]>>63 != 0 {
+ return 1
+ }
+ // amd64:"BTQ\t[$]63"
+ if a[2]>>63 == 0 {
+ return 1
+ }
+ // amd64:"BTQ\t[$]60"
+ if (a[3]>>60)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]1"
+ if (a[4]>>1)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if (a[5]>>0)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]7"
+ if (a[6]>>5)&4 == 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck64_var(a, b uint64) (n int) {
+ // amd64:"BTQ"
+ if a&(1<<(b&63)) != 0 {
+ return 1
+ }
+ // amd64:"BTQ",-"BT.\t[$]0"
+ if (b>>(a&63))&1 != 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck64_mask(a uint64) (n int) {
+ // amd64:"BTQ\t[$]63"
+ if a&0x8000000000000000 != 0 {
+ return 1
+ }
+ // amd64:"BTQ\t[$]59"
+ if a&0x800000000000000 != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if a&0x1 != 0 {
+ return 1
+ }
+ return 0
+}
+
+func biton64(a, b uint64) (n uint64) {
+ // amd64:"BTSQ"
+ n += b | (1 << (a & 63))
+
+ // amd64:"BTSQ\t[$]63"
+ n += a | (1 << 63)
+
+ // amd64:"BTSQ\t[$]60"
+ n += a | (1 << 60)
+
+ // amd64:"ORQ\t[$]1"
+ n += a | (1 << 0)
+
+ return n
+}
+
+func bitoff64(a, b uint64) (n uint64) {
+ // amd64:"BTRQ"
+ n += b &^ (1 << (a & 63))
+
+ // amd64:"BTRQ\t[$]63"
+ n += a &^ (1 << 63)
+
+ // amd64:"BTRQ\t[$]60"
+ n += a &^ (1 << 60)
+
+ // amd64:"ANDQ\t[$]-2"
+ n += a &^ (1 << 0)
+
+ return n
+}
+
+func bitcompl64(a, b uint64) (n uint64) {
+ // amd64:"BTCQ"
+ n += b ^ (1 << (a & 63))
+
+ // amd64:"BTCQ\t[$]63"
+ n += a ^ (1 << 63)
+
+ // amd64:"BTCQ\t[$]60"
+ n += a ^ (1 << 60)
+
+ // amd64:"XORQ\t[$]1"
+ n += a ^ (1 << 0)
+
+ return n
+}
+
+/************************************
+ * 32-bit instructions
+ ************************************/
+
+func bitcheck32_constleft(a uint32) (n int) {
+ // amd64:"BTL\t[$]31"
+ if a&(1<<31) != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]28"
+ if a&(1<<28) != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if a&(1<<0) != 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck32_constright(a [8]uint32) (n int) {
+ // amd64:"BTL\t[$]31"
+ if (a[0]>>31)&1 != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]31"
+ if a[1]>>31 != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]31"
+ if a[2]>>31 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]28"
+ if (a[3]>>28)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]1"
+ if (a[4]>>1)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if (a[5]>>0)&1 == 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]7"
+ if (a[6]>>5)&4 == 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck32_var(a, b uint32) (n int) {
+ // amd64:"BTL"
+ if a&(1<<(b&31)) != 0 {
+ return 1
+ }
+ // amd64:"BTL",-"BT.\t[$]0"
+ if (b>>(a&31))&1 != 0 {
+ return 1
+ }
+ return 0
+}
+
+func bitcheck32_mask(a uint32) (n int) {
+ // amd64:"BTL\t[$]31"
+ if a&0x80000000 != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]27"
+ if a&0x8000000 != 0 {
+ return 1
+ }
+ // amd64:"BTL\t[$]0"
+ if a&0x1 != 0 {
+ return 1
+ }
+ return 0
+}
+
+func biton32(a, b uint32) (n uint32) {
+ // amd64:"BTSL"
+ n += b | (1 << (a & 31))
+
+ // amd64:"ORL\t[$]-2147483648"
+ n += a | (1 << 31)
+
+ // amd64:"ORL\t[$]268435456"
+ n += a | (1 << 28)
+
+ // amd64:"ORL\t[$]1"
+ n += a | (1 << 0)
+
+ return n
+}
+
+func bitoff32(a, b uint32) (n uint32) {
+ // amd64:"BTRL"
+ n += b &^ (1 << (a & 31))
+
+ // amd64:"ANDL\t[$]2147483647"
+ n += a &^ (1 << 31)
+
+ // amd64:"ANDL\t[$]-268435457"
+ n += a &^ (1 << 28)
+
+ // amd64:"ANDL\t[$]-2"
+ n += a &^ (1 << 0)
+
+ return n
+}
+
+func bitcompl32(a, b uint32) (n uint32) {
+ // amd64:"BTCL"
+ n += b ^ (1 << (a & 31))
+
+ // amd64:"XORL\t[$]-2147483648"
+ n += a ^ (1 << 31)
+
+ // amd64:"XORL\t[$]268435456"
+ n += a ^ (1 << 28)
+
+ // amd64:"XORL\t[$]1"
+ n += a ^ (1 << 0)
+
+ return n
+}
+
+// check direct operation on memory with constant and shifted constant sources
+func bitOpOnMem(a []uint32, b, c, d uint32) {
+ // amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)`
+ a[0] &= 200
+ // amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)`
+ a[1] |= 220
+ // amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
+ a[2] ^= 240
+}
+
+func bitcheckMostNegative(b uint8) bool {
+ // amd64:"TESTB"
+ return b&0x80 == 0x80
+}
+
+// Check AND masking on arm64 (Issue #19857)
+
+func and_mask_1(a uint64) uint64 {
+ // arm64:`AND\t`
+ return a & ((1 << 63) - 1)
+}
+
+func and_mask_2(a uint64) uint64 {
+ // arm64:`AND\t`
+ return a & (1 << 63)
+}
+
+func and_mask_3(a, b uint32) (uint32, uint32) {
+ // arm/7:`BIC`,-`AND`
+ a &= 0xffffaaaa
+ // arm/7:`BFC`,-`AND`,-`BIC`
+ b &= 0xffc003ff
+ return a, b
+}
+
+// Check generation of arm64 BIC/EON/ORN instructions
+
+func op_bic(x, y uint32) uint32 {
+ // arm64:`BIC\t`,-`AND`
+ return x &^ y
+}
+
+func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 {
+ // arm64:`EON\t`,-`EOR`,-`MVN`
+ a[0] = x ^ (y ^ 0xffffffff)
+
+ // arm64:`EON\t`,-`EOR`,-`MVN`
+ a[1] = ^(y ^ z)
+
+ // arm64:`EON\t`,-`XOR`
+ a[2] = x ^ ^z
+
+ // arm64:`EON\t`,-`EOR`,-`MVN`
+ return n ^ (m ^ 0xffffffffffffffff)
+}
+
+func op_orn(x, y uint32) uint32 {
+ // arm64:`ORN\t`,-`ORR`
+ return x | ^y
+}
+
+// check bitsets
+func bitSetPowerOf2Test(x int) bool {
+ // amd64:"BTL\t[$]3"
+ return x&8 == 8
+}
+
+func bitSetTest(x int) bool {
+ // amd64:"ANDL\t[$]9, AX"
+ // amd64:"CMPQ\tAX, [$]9"
+ return x&9 == 9
+}
+
+// mask contiguous one bits
+func cont1Mask64U(x uint64) uint64 {
+ // s390x:"RISBGZ\t[$]16, [$]47, [$]0,"
+ return x & 0x0000ffffffff0000
+}
+
+// mask contiguous zero bits
+func cont0Mask64U(x uint64) uint64 {
+ // s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
+ return x & 0xffff00000000ffff
+}
+
+func issue44228a(a []int64, i int) bool {
+ // amd64: "BTQ", -"SHL"
+ return a[i>>6]&(1<<(i&63)) != 0
+}
+func issue44228b(a []int32, i int) bool {
+ // amd64: "BTL", -"SHL"
+ return a[i>>5]&(1<<(i&31)) != 0
+}
+
+func issue48467(x, y uint64) uint64 {
+ // arm64: -"NEG"
+ d, borrow := bits.Sub64(x, y, 0)
+ return x - d&(-borrow)
+}
+
+func foldConst(x, y uint64) uint64 {
+ // arm64: "ADDS\t[$]7",-"MOVD\t[$]7"
+ d, b := bits.Add64(x, 7, 0)
+ return b & d
+}
+
+func foldConstOutOfRange(a uint64) uint64 {
+ // arm64: "MOVD\t[$]19088744",-"ADD\t[$]19088744"
+ return a + 0x1234568
+}
+
+// Verify sign-extended values are not zero-extended under a bit mask (#61297)
+func signextendAndMask8to64(a int8) (s, z uint64) {
+ // ppc64x: "MOVB", "ANDCC\t[$]1015,"
+ s = uint64(a) & 0x3F7
+ // ppc64x: -"MOVB", "ANDCC\t[$]247,"
+ z = uint64(uint8(a)) & 0x3F7
+ return
+}
+
+// Verify zero-extended values are not sign-extended under a bit mask (#61297)
+func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
+ // ppc64x: -"MOVB\t", -"ANDCC", "MOVBZ"
+ x = uint64(a) & 0xFF
+ // ppc64x: -"MOVH\t", -"ANDCC", "MOVHZ"
+ y = uint64(b) & 0xFFFF
+ return
+}
+
+// Verify rotate and mask instructions, and further simplified instructions for small types
+func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+ // ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R"
+ io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
+ // ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R"
+ io64[1] = io64[1] & 0x0000FFFFFFFFFFFF
+ // ppc64x: -"SRD", -"AND", "RLDICL\t[$]60, R[0-9]*, [$]16, R"
+ io64[2] = (io64[2] >> 4) & 0x0000FFFFFFFFFFFF
+ // ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R"
+ io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF
+
+ // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R"
+ io32[0] = io32[0] & 0x0FFFF000
+ // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R"
+ io32[1] = io32[1] & 0xF0000FFF
+ // ppc64x: -"RLWNM", MOVD, AND
+ io32[2] = io32[2] & 0xFFFF0002
+
+ var bigc uint32 = 0x12345678
+ // ppc64x: "ANDCC\t[$]22136"
+ io16[0] = io16[0] & uint16(bigc)
+
+ // ppc64x: "ANDCC\t[$]120"
+ io8[0] = io8[0] & uint8(bigc)
+}
diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go
new file mode 100644
index 0000000..aa61b03
--- /dev/null
+++ b/test/codegen/bmi.go
@@ -0,0 +1,209 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func andn64(x, y int64) int64 {
+ // amd64/v3:"ANDNQ"
+ return x &^ y
+}
+
+func andn32(x, y int32) int32 {
+ // amd64/v3:"ANDNL"
+ return x &^ y
+}
+
+func blsi64(x int64) int64 {
+ // amd64/v3:"BLSIQ"
+ return x & -x
+}
+
+func blsi32(x int32) int32 {
+ // amd64/v3:"BLSIL"
+ return x & -x
+}
+
+func blsmsk64(x int64) int64 {
+ // amd64/v3:"BLSMSKQ"
+ return x ^ (x - 1)
+}
+
+func blsmsk32(x int32) int32 {
+ // amd64/v3:"BLSMSKL"
+ return x ^ (x - 1)
+}
+
+func blsr64(x int64) int64 {
+ // amd64/v3:"BLSRQ"
+ return x & (x - 1)
+}
+
+func blsr32(x int32) int32 {
+ // amd64/v3:"BLSRL"
+ return x & (x - 1)
+}
+
+func isPowerOfTwo64(x int64) bool {
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ return blsr64(x) == 0
+}
+
+func isPowerOfTwo32(x int32) bool {
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ return blsr32(x) == 0
+}
+
+func isPowerOfTwoSelect64(x, a, b int64) int64 {
+ var r int64
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ if isPowerOfTwo64(x) {
+ r = a
+ } else {
+ r = b
+ }
+ // amd64/v3:"CMOVQEQ",-"TESTQ",-"CALL"
+ return r * 2 // force return blocks joining
+}
+
+func isPowerOfTwoSelect32(x, a, b int32) int32 {
+ var r int32
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ if isPowerOfTwo32(x) {
+ r = a
+ } else {
+ r = b
+ }
+ // amd64/v3:"CMOVLEQ",-"TESTL",-"CALL"
+ return r * 2 // force return blocks joining
+}
+
+func isPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ if isPowerOfTwo64(x) {
+ a(true)
+ } else {
+ b("false")
+ }
+}
+
+func isPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ if isPowerOfTwo32(x) {
+ a(true)
+ } else {
+ b("false")
+ }
+}
+
+func isNotPowerOfTwo64(x int64) bool {
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ return blsr64(x) != 0
+}
+
+func isNotPowerOfTwo32(x int32) bool {
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ return blsr32(x) != 0
+}
+
+func isNotPowerOfTwoSelect64(x, a, b int64) int64 {
+ var r int64
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ if isNotPowerOfTwo64(x) {
+ r = a
+ } else {
+ r = b
+ }
+ // amd64/v3:"CMOVQNE",-"TESTQ",-"CALL"
+ return r * 2 // force return blocks joining
+}
+
+func isNotPowerOfTwoSelect32(x, a, b int32) int32 {
+ var r int32
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ if isNotPowerOfTwo32(x) {
+ r = a
+ } else {
+ r = b
+ }
+ // amd64/v3:"CMOVLNE",-"TESTL",-"CALL"
+ return r * 2 // force return blocks joining
+}
+
+func isNotPowerOfTwoBranch64(x int64, a func(bool), b func(string)) {
+ // amd64/v3:"BLSRQ",-"TESTQ",-"CALL"
+ if isNotPowerOfTwo64(x) {
+ a(true)
+ } else {
+ b("false")
+ }
+}
+
+func isNotPowerOfTwoBranch32(x int32, a func(bool), b func(string)) {
+ // amd64/v3:"BLSRL",-"TESTL",-"CALL"
+ if isNotPowerOfTwo32(x) {
+ a(true)
+ } else {
+ b("false")
+ }
+}
+
+func sarx64(x, y int64) int64 {
+ // amd64/v3:"SARXQ"
+ return x >> y
+}
+
+func sarx32(x, y int32) int32 {
+ // amd64/v3:"SARXL"
+ return x >> y
+}
+
+func sarx64_load(x []int64, i int) int64 {
+ // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s := x[i] >> (i & 63)
+ // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s = x[i+1] >> (s & 63)
+ return s
+}
+
+func sarx32_load(x []int32, i int) int32 {
+ // amd64/v3: `SARXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s := x[i] >> (i & 63)
+ // amd64/v3: `SARXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s = x[i+1] >> (s & 63)
+ return s
+}
+
+func shlrx64(x, y uint64) uint64 {
+ // amd64/v3:"SHRXQ"
+ s := x >> y
+ // amd64/v3:"SHLXQ"
+ s = s << y
+ return s
+}
+
+func shlrx32(x, y uint32) uint32 {
+ // amd64/v3:"SHRXL"
+ s := x >> y
+ // amd64/v3:"SHLXL"
+ s = s << y
+ return s
+}
+
+func shlrx64_load(x []uint64, i int, s uint64) uint64 {
+ // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s = x[i] >> i
+ // amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s = x[i+1] << s
+ return s
+}
+
+func shlrx32_load(x []uint32, i int, s uint32) uint32 {
+ // amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s = x[i] >> i
+ // amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s = x[i+1] << s
+ return s
+}
diff --git a/test/codegen/bool.go b/test/codegen/bool.go
new file mode 100644
index 0000000..990a9ed
--- /dev/null
+++ b/test/codegen/bool.go
@@ -0,0 +1,276 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+ "math/bits"
+)
+
+// This file contains codegen tests related to boolean simplifications/optimizations.
+
+func convertNeq0B(x uint8, c bool) bool {
+ // amd64:"ANDL\t[$]1",-"SETNE"
+ // ppc64x:"RLDICL",-"CMPW",-"ISEL"
+ b := x&1 != 0
+ return c && b
+}
+
+func convertNeq0W(x uint16, c bool) bool {
+ // amd64:"ANDL\t[$]1",-"SETNE"
+ // ppc64x:"RLDICL",-"CMPW",-"ISEL"
+ b := x&1 != 0
+ return c && b
+}
+
+func convertNeq0L(x uint32, c bool) bool {
+ // amd64:"ANDL\t[$]1",-"SETB"
+ // ppc64x:"RLDICL",-"CMPW",-"ISEL"
+ b := x&1 != 0
+ return c && b
+}
+
+func convertNeq0Q(x uint64, c bool) bool {
+ // amd64:"ANDL\t[$]1",-"SETB"
+ // ppc64x:"RLDICL",-"CMP",-"ISEL"
+ b := x&1 != 0
+ return c && b
+}
+
+func convertNeqBool32(x uint32) bool {
+ // ppc64x:"RLDICL",-"CMPW",-"ISEL"
+ return x&1 != 0
+}
+
+func convertEqBool32(x uint32) bool {
+ // ppc64x:"RLDICL",-"CMPW","XOR",-"ISEL"
+ return x&1 == 0
+}
+
+func convertNeqBool64(x uint64) bool {
+ // ppc64x:"RLDICL",-"CMP",-"ISEL"
+ return x&1 != 0
+}
+
+func convertEqBool64(x uint64) bool {
+ // ppc64x:"RLDICL","XOR",-"CMP",-"ISEL"
+ return x&1 == 0
+}
+
+func TestSetEq64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0EQ"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0EQ"
+ b := x == y
+ return b
+}
+func TestSetNeq64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0EQ"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0EQ"
+ b := x != y
+ return b
+}
+func TestSetLt64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0GT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0GT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0GT"
+ b := x < y
+ return b
+}
+func TestSetLe64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0LT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0LT"
+ b := x <= y
+ return b
+}
+func TestSetGt64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0LT"
+ b := x > y
+ return b
+}
+func TestSetGe64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0GT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0GT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0GT"
+ b := x >= y
+ return b
+}
+func TestSetLtFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+ b := x < y
+ return b
+}
+func TestSetLeFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT","SETBC\tCR0EQ","OR",-"ISEL",-"ISEL"
+ // ppc64x/power9:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+ // ppc64x/power8:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+ b := x <= y
+ return b
+}
+func TestSetGtFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+ b := x > y
+ return b
+}
+func TestSetGeFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT","SETBC\tCR0EQ","OR",-"ISEL",-"ISEL"
+ // ppc64x/power9:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+ // ppc64x/power8:"ISEL","ISEL",-"SETBC\tCR0LT",-"SETBC\tCR0EQ","OR"
+ b := x >= y
+ return b
+}
+func TestSetInvEq64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0EQ"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0EQ"
+ b := !(x == y)
+ return b
+}
+func TestSetInvNeq64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0EQ"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0EQ"
+ b := !(x != y)
+ return b
+}
+func TestSetInvLt64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0GT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0GT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0GT"
+ b := !(x < y)
+ return b
+}
+func TestSetInvLe64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0LT"
+ b := !(x <= y)
+ return b
+}
+func TestSetInvGt64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBCR\tCR0LT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBCR\tCR0LT"
+ b := !(x > y)
+ return b
+}
+func TestSetInvGe64(x uint64, y uint64) bool {
+ // ppc64x/power10:"SETBC\tCR0GT",-"ISEL"
+ // ppc64x/power9:"CMP","ISEL",-"SETBC\tCR0GT"
+ // ppc64x/power8:"CMP","ISEL",-"SETBC\tCR0GT"
+ b := !(x >= y)
+ return b
+}
+
+func TestSetInvEqFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBCR\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0EQ"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0EQ"
+ b := !(x == y)
+ return b
+}
+func TestSetInvNeqFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0EQ",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0EQ"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0EQ"
+ b := !(x != y)
+ return b
+}
+func TestSetInvLtFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0LT"
+ b := !(x < y)
+ return b
+}
+func TestSetInvLeFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+ b := !(x <= y)
+ return b
+}
+func TestSetInvGtFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBCR\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBCR\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBCR\tCR0LT"
+ b := !(x > y)
+ return b
+}
+func TestSetInvGeFp64(x float64, y float64) bool {
+ // ppc64x/power10:"SETBC\tCR0LT",-"ISEL"
+ // ppc64x/power9:"FCMP","ISEL",-"SETBC\tCR0LT"
+ // ppc64x/power8:"FCMP","ISEL",-"SETBC\tCR0LT"
+ b := !(x >= y)
+ return b
+}
+func TestLogicalCompareZero(x *[64]uint64) {
+ // ppc64x:"ANDCC",^"AND"
+ b := x[0]&3
+ if b!=0 {
+ x[0] = b
+ }
+ // ppc64x:"ANDCC",^"AND"
+ b = x[1]&x[2]
+ if b!=0 {
+ x[1] = b
+ }
+ // ppc64x:"ANDNCC",^"ANDN"
+ b = x[1]&^x[2]
+ if b!=0 {
+ x[1] = b
+ }
+ // ppc64x:"ORCC",^"OR"
+ b = x[3]|x[4]
+ if b!=0 {
+ x[3] = b
+ }
+ // ppc64x:"SUBCC",^"SUB"
+ b = x[5]-x[6]
+ if b!=0 {
+ x[5] = b
+ }
+ // ppc64x:"NORCC",^"NOR"
+ b = ^(x[5]|x[6])
+ if b!=0 {
+ x[5] = b
+ }
+ // ppc64x:"XORCC",^"XOR"
+ b = x[7]^x[8]
+ if b!=0 {
+ x[7] = b
+ }
+ // ppc64x:"ADDCC",^"ADD"
+ b = x[9]+x[10]
+ if b!=0 {
+ x[9] = b
+ }
+ // ppc64x:"NEGCC",^"NEG"
+ b = -x[11]
+ if b!=0 {
+ x[11] = b
+ }
+ // ppc64x:"CNTLZDCC",^"CNTLZD"
+ b = uint64(bits.LeadingZeros64(x[12]))
+ if b!=0 {
+ x[12] = b
+ }
+
+ // ppc64x:"ADDCCC\t[$]4,"
+ c := int64(x[12]) + 4
+ if c <= 0 {
+ x[12] = uint64(c)
+ }
+
+}
diff --git a/test/codegen/clobberdead.go b/test/codegen/clobberdead.go
new file mode 100644
index 0000000..13d2efb
--- /dev/null
+++ b/test/codegen/clobberdead.go
@@ -0,0 +1,35 @@
+// asmcheck -gcflags=-clobberdead
+
+//go:build amd64 || arm64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type T [2]*int // contain pointer, not SSA-able (so locals are not registerized)
+
+var p1, p2, p3 T
+
+func F() {
+ // 3735936685 is 0xdeaddead. On ARM64 R27 is REGTMP.
+ // clobber x, y at entry. not clobber z (stack object).
+ // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y`, -`MOVL\t\$3735936685, command-line-arguments\.z`
+ // arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y`, -`MOVW\tR27, command-line-arguments\.z`
+ x, y, z := p1, p2, p3
+ addrTaken(&z)
+ // x is dead at the call (the value of x is loaded before the CALL), y is not
+ // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, -`MOVL\t\$3735936685, command-line-arguments\.y`
+ // arm64:`MOVW\tR27, command-line-arguments\.x`, -`MOVW\tR27, command-line-arguments\.y`
+ use(x)
+ // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y`
+ // arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y`
+ use(y)
+}
+
+//go:noinline
+func use(T) {}
+
+//go:noinline
+func addrTaken(*T) {}
diff --git a/test/codegen/clobberdeadreg.go b/test/codegen/clobberdeadreg.go
new file mode 100644
index 0000000..39c4a74
--- /dev/null
+++ b/test/codegen/clobberdeadreg.go
@@ -0,0 +1,33 @@
+// asmcheck -gcflags=-clobberdeadreg
+
+//go:build amd64
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type S struct {
+ a, b, c, d, e, f int
+}
+
+func F(a, b, c int, d S) {
+ // -2401018187971961171 is 0xdeaddeaddeaddead
+ // amd64:`MOVQ\t\$-2401018187971961171, AX`, `MOVQ\t\$-2401018187971961171, BX`, `MOVQ\t\$-2401018187971961171, CX`
+ // amd64:`MOVQ\t\$-2401018187971961171, DX`, `MOVQ\t\$-2401018187971961171, SI`, `MOVQ\t\$-2401018187971961171, DI`
+ // amd64:`MOVQ\t\$-2401018187971961171, R8`, `MOVQ\t\$-2401018187971961171, R9`, `MOVQ\t\$-2401018187971961171, R10`
+ // amd64:`MOVQ\t\$-2401018187971961171, R11`, `MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`
+ // amd64:-`MOVQ\t\$-2401018187971961171, BP` // frame pointer is not clobbered
+ StackArgsCall([10]int{a, b, c})
+ // amd64:`MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`, `MOVQ\t\$-2401018187971961171, DX`
+ // amd64:-`MOVQ\t\$-2401018187971961171, AX`, -`MOVQ\t\$-2401018187971961171, R11` // register args are not clobbered
+ RegArgsCall(a, b, c, d)
+}
+
+//go:noinline
+func StackArgsCall([10]int) {}
+
+//go:noinline
+//go:registerparams
+func RegArgsCall(int, int, int, S) {}
diff --git a/test/codegen/compare_and_branch.go b/test/codegen/compare_and_branch.go
new file mode 100644
index 0000000..c121f1d
--- /dev/null
+++ b/test/codegen/compare_and_branch.go
@@ -0,0 +1,244 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:noinline
+func dummy() {}
+
+// Signed 64-bit compare-and-branch.
+func si64(x, y chan int64) {
+ // s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+ for <-x < <-y {
+ dummy()
+ }
+
+ // s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
+ for <-x == <-y {
+ dummy()
+ }
+}
+
+// Signed 64-bit compare-and-branch with 8-bit immediate.
+func si64x8(doNotOptimize int64) {
+ // take in doNotOptimize as an argument to avoid the loops being rewritten to count down
+ // s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
+ for i := doNotOptimize; i < 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
+ for i := doNotOptimize; i > -129; i-- {
+ dummy()
+ }
+
+ // s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
+ for i := doNotOptimize; i >= 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
+ for i := doNotOptimize; i <= -129; i-- {
+ dummy()
+ }
+}
+
+// Unsigned 64-bit compare-and-branch.
+func ui64(x, y chan uint64) {
+ // s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+ for <-x > <-y {
+ dummy()
+ }
+
+ // s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
+ for <-x != <-y {
+ dummy()
+ }
+}
+
+// Unsigned 64-bit comparison with 8-bit immediate.
+func ui64x8() {
+ // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
+ for i := uint64(0); i < 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
+ for i := uint64(0); i < 256; i++ {
+ dummy()
+ }
+
+ // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
+ for i := uint64(257); i >= 256; i-- {
+ dummy()
+ }
+
+ // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
+ for i := uint64(1024); i > 0; i-- {
+ dummy()
+ }
+}
+
+// Signed 32-bit compare-and-branch.
+func si32(x, y chan int32) {
+ // s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+ for <-x < <-y {
+ dummy()
+ }
+
+ // s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
+ for <-x == <-y {
+ dummy()
+ }
+}
+
+// Signed 32-bit compare-and-branch with 8-bit immediate.
+func si32x8(doNotOptimize int32) {
+ // take in doNotOptimize as an argument to avoid the loops being rewritten to count down
+ // s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
+ for i := doNotOptimize; i < 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
+ for i := doNotOptimize; i > -129; i-- {
+ dummy()
+ }
+
+ // s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
+ for i := doNotOptimize; i >= 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
+ for i := doNotOptimize; i <= -129; i-- {
+ dummy()
+ }
+}
+
+// Unsigned 32-bit compare-and-branch.
+func ui32(x, y chan uint32) {
+ // s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
+ for <-x > <-y {
+ dummy()
+ }
+
+ // s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
+ for <-x != <-y {
+ dummy()
+ }
+}
+
+// Unsigned 32-bit comparison with 8-bit immediate.
+func ui32x8() {
+ // s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
+ for i := uint32(0); i < 128; i++ {
+ dummy()
+ }
+
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
+ for i := uint32(0); i < 256; i++ {
+ dummy()
+ }
+
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
+ for i := uint32(257); i >= 256; i-- {
+ dummy()
+ }
+
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
+ for i := uint32(1024); i > 0; i-- {
+ dummy()
+ }
+}
+
+// Signed 64-bit comparison with unsigned 8-bit immediate.
+func si64xu8(x chan int64) {
+ // s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, "
+ for <-x == 128 {
+ dummy()
+ }
+
+ // s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, "
+ for <-x != 255 {
+ dummy()
+ }
+}
+
+// Signed 32-bit comparison with unsigned 8-bit immediate.
+func si32xu8(x chan int32) {
+ // s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, "
+ for <-x == 255 {
+ dummy()
+ }
+
+ // s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, "
+ for <-x != 128 {
+ dummy()
+ }
+}
+
+// Unsigned 64-bit comparison with signed 8-bit immediate.
+func ui64xu8(x chan uint64) {
+ // s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, "
+ for <-x == ^uint64(0) {
+ dummy()
+ }
+
+ // s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, "
+ for <-x != ^uint64(127) {
+ dummy()
+ }
+}
+
+// Unsigned 32-bit comparison with signed 8-bit immediate.
+func ui32xu8(x chan uint32) {
+ // s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, "
+ for <-x == ^uint32(127) {
+ dummy()
+ }
+
+ // s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, "
+ for <-x != ^uint32(0) {
+ dummy()
+ }
+}
+
+// Signed 64-bit comparison with 1/-1 to comparison with 0.
+func si64x0(x chan int64) {
+ // riscv64:"BGTZ"
+ for <-x >= 1 {
+ dummy()
+ }
+
+ // riscv64:"BLEZ"
+ for <-x < 1 {
+ dummy()
+ }
+
+ // riscv64:"BLTZ"
+ for <-x <= -1 {
+ dummy()
+ }
+
+ // riscv64:"BGEZ"
+ for <-x > -1 {
+ dummy()
+ }
+}
+
+// Unsigned 64-bit comparison with 1 to comparison with 0.
+func ui64x0(x chan uint64) {
+ // riscv64:"BNEZ"
+ for <-x >= 1 {
+ dummy()
+ }
+
+ // riscv64:"BEQZ"
+ for <-x < 1 {
+ dummy()
+ }
+}
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
new file mode 100644
index 0000000..4edf930
--- /dev/null
+++ b/test/codegen/comparisons.go
@@ -0,0 +1,803 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "unsafe"
+
+// This file contains code generation tests related to the comparison
+// operators.
+
+// -------------- //
+// Equality //
+// -------------- //
+
+// Check that compare to constant string use 2/4/8 byte compares
+
+func CompareString1(s string) bool {
+ // amd64:`CMPW\t\(.*\), [$]`
+ // arm64:`MOVHU\t\(.*\), [R]`,`MOVD\t[$]`,`CMPW\tR`
+ // ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
+ // s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+ return s == "xx"
+}
+
+func CompareString2(s string) bool {
+ // amd64:`CMPL\t\(.*\), [$]`
+ // arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+ // ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+ // s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
+ return s == "xxxx"
+}
+
+func CompareString3(s string) bool {
+ // amd64:`CMPQ\t\(.*\), [A-Z]`
+ // arm64:-`CMPW\t`
+ // ppc64x:-`CMPW\t`
+ // s390x:-`CMPW\t`
+ return s == "xxxxxxxx"
+}
+
+// Check that arrays compare use 2/4/8 byte compares
+
+func CompareArray1(a, b [2]byte) bool {
+ // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // arm64:-`MOVBU\t`
+ // ppc64le:-`MOVBZ\t`
+ // s390x:-`MOVBZ\t`
+ return a == b
+}
+
+func CompareArray2(a, b [3]uint16) bool {
+ // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ return a == b
+}
+
+func CompareArray3(a, b [3]int16) bool {
+ // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ return a == b
+}
+
+func CompareArray4(a, b [12]int8) bool {
+ // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ return a == b
+}
+
+func CompareArray5(a, b [15]byte) bool {
+ // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ return a == b
+}
+
+// This was a TODO in mapaccess1_faststr
+func CompareArray6(a, b unsafe.Pointer) bool {
+ // amd64:`CMPL\t\(.*\), [A-Z]`
+ // arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]`
+ // ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]`
+ // s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]`
+ return *((*[4]byte)(a)) != *((*[4]byte)(b))
+}
+
+// Check that some structs generate 2/4/8 byte compares.
+
+type T1 struct {
+ a [8]byte
+}
+
+func CompareStruct1(s1, s2 T1) bool {
+ // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:-`CALL`
+ return s1 == s2
+}
+
+type T2 struct {
+ a [16]byte
+}
+
+func CompareStruct2(s1, s2 T2) bool {
+ // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:-`CALL`
+ return s1 == s2
+}
+
+// Assert that a memequal call is still generated when
+// inlining would increase binary size too much.
+
+type T3 struct {
+ a [24]byte
+}
+
+func CompareStruct3(s1, s2 T3) bool {
+ // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:`CALL`
+ return s1 == s2
+}
+
+type T4 struct {
+ a [32]byte
+}
+
+func CompareStruct4(s1, s2 T4) bool {
+ // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]`
+ // amd64:`CALL`
+ return s1 == s2
+}
+
+// -------------- //
+// Ordering //
+// -------------- //
+
+// Test that LEAQ/ADDQconst are folded into SETx ops
+
+var r bool
+
+func CmpFold(x uint32) {
+ // amd64:`SETHI\t.*\(SB\)`
+ r = x > 4
+}
+
+// Test that direct comparisons with memory are generated when
+// possible
+
+func CmpMem1(p int, q *int) bool {
+ // amd64:`CMPQ\t\(.*\), [A-Z]`
+ return p < *q
+}
+
+func CmpMem2(p *int, q int) bool {
+ // amd64:`CMPQ\t\(.*\), [A-Z]`
+ return *p < q
+}
+
+func CmpMem3(p *int) bool {
+ // amd64:`CMPQ\t\(.*\), [$]7`
+ return *p < 7
+}
+
+func CmpMem4(p *int) bool {
+ // amd64:`CMPQ\t\(.*\), [$]7`
+ return 7 < *p
+}
+
+func CmpMem5(p **int) {
+ // amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0`
+ *p = nil
+}
+
+func CmpMem6(a []int) int {
+ // 386:`CMPL\s8\([A-Z]+\),`
+ // amd64:`CMPQ\s16\([A-Z]+\),`
+ if a[1] > a[2] {
+ return 1
+ } else {
+ return 2
+ }
+}
+
+// Check tbz/tbnz are generated when comparing against zero on arm64
+
+func CmpZero1(a int32, ptr *int) {
+ if a < 0 { // arm64:"TBZ"
+ *ptr = 0
+ }
+}
+
+func CmpZero2(a int64, ptr *int) {
+ if a < 0 { // arm64:"TBZ"
+ *ptr = 0
+ }
+}
+
+func CmpZero3(a int32, ptr *int) {
+ if a >= 0 { // arm64:"TBNZ"
+ *ptr = 0
+ }
+}
+
+func CmpZero4(a int64, ptr *int) {
+ if a >= 0 { // arm64:"TBNZ"
+ *ptr = 0
+ }
+}
+
+func CmpToZero(a, b, d int32, e, f int64, deOptC0, deOptC1 bool) int32 {
+ // arm:`TST`,-`AND`
+ // arm64:`TSTW`,-`AND`
+ // 386:`TESTL`,-`ANDL`
+ // amd64:`TESTL`,-`ANDL`
+ c0 := a&b < 0
+ // arm:`CMN`,-`ADD`
+ // arm64:`CMNW`,-`ADD`
+ c1 := a+b < 0
+ // arm:`TEQ`,-`XOR`
+ c2 := a^b < 0
+ // arm64:`TST`,-`AND`
+ // amd64:`TESTQ`,-`ANDQ`
+ c3 := e&f < 0
+ // arm64:`CMN`,-`ADD`
+ c4 := e+f < 0
+ // not optimized to single CMNW/CMN due to further use of b+d
+ // arm64:`ADD`,-`CMNW`
+ // arm:`ADD`,-`CMN`
+ c5 := b+d == 0
+ // not optimized to single TSTW/TST due to further use of a&d
+ // arm64:`AND`,-`TSTW`
+ // arm:`AND`,-`TST`
+ // 386:`ANDL`
+ c6 := a&d >= 0
+ // arm64:`TST\sR[0-9]+<<3,\sR[0-9]+`
+ c7 := e&(f<<3) < 0
+ // arm64:`CMN\sR[0-9]+<<3,\sR[0-9]+`
+ c8 := e+(f<<3) < 0
+ // arm64:`TST\sR[0-9],\sR[0-9]+`
+ c9 := e&(-19) < 0
+ if c0 {
+ return 1
+ } else if c1 {
+ return 2
+ } else if c2 {
+ return 3
+ } else if c3 {
+ return 4
+ } else if c4 {
+ return 5
+ } else if c5 {
+ return 6
+ } else if c6 {
+ return 7
+ } else if c7 {
+ return 9
+ } else if c8 {
+ return 10
+ } else if c9 {
+ return 11
+ } else if deOptC0 {
+ return b + d
+ } else if deOptC1 {
+ return a & d
+ } else {
+ return 0
+ }
+}
+
+func CmpLogicalToZero(a, b, c uint32, d, e uint64) uint64 {
+
+ // ppc64x:"ANDCC",-"CMPW"
+ // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+ if a&63 == 0 {
+ return 1
+ }
+
+ // ppc64x:"ANDCC",-"CMP"
+ // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+ if d&255 == 0 {
+ return 1
+ }
+
+ // ppc64x:"ANDCC",-"CMP"
+ // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+ if d&e == 0 {
+ return 1
+ }
+ // ppc64x:"ORCC",-"CMP"
+ // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+ if d|e == 0 {
+ return 1
+ }
+
+ // ppc64x:"XORCC",-"CMP"
+ // wasm:"I64Eqz","I32Eqz",-"I64ExtendI32U",-"I32WrapI64"
+ if e^d == 0 {
+ return 1
+ }
+ return 0
+}
+
+// The following CmpToZero_ex* check that cmp|cmn with bmi|bpl are generated for
+// 'comparing to zero' expressions
+
+// var + const
+// 'x-const' might be canonicalized to 'x+(-const)', so we check both
+// CMN and CMP for subtraction expressions to make the pattern robust.
+func CmpToZero_ex1(a int64, e int32) int {
+ // arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+ if a+3 < 0 {
+ return 1
+ }
+
+ // arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+ if a+5 <= 0 {
+ return 1
+ }
+
+ // arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+ if a+13 >= 0 {
+ return 2
+ }
+
+ // arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+ if a-7 < 0 {
+ return 3
+ }
+
+ // arm64:`SUB`,`TBZ`
+ if a-11 >= 0 {
+ return 4
+ }
+
+ // arm64:`SUB`,`CMP`,`BGT`
+ if a-19 > 0 {
+ return 4
+ }
+
+ // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
+ if e+3 < 0 {
+ return 5
+ }
+
+ // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
+ if e+13 >= 0 {
+ return 6
+ }
+
+ // arm64:`CMPW|CMNW`,`(BMI|BPL)`
+ // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+ if e-7 < 0 {
+ return 7
+ }
+
+ // arm64:`SUB`,`TBNZ`
+ // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
+ if e-11 >= 0 {
+ return 8
+ }
+
+ return 0
+}
+
+// var + var
+// TODO: optimize 'var - var'
+func CmpToZero_ex2(a, b, c int64, e, f, g int32) int {
+ // arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+ if a+b < 0 {
+ return 1
+ }
+
+ // arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+ if a+c <= 0 {
+ return 1
+ }
+
+ // arm64:`CMN`,-`ADD`,`(BMI|BPL)`
+ if b+c >= 0 {
+ return 2
+ }
+
+ // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
+ if e+f < 0 {
+ return 5
+ }
+
+ // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
+ if f+g >= 0 {
+ return 6
+ }
+ return 0
+}
+
+// var + var*var
+func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int {
+ // arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+ if a+b*c < 0 {
+ return 1
+ }
+
+ // arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)`
+ if b+c*d >= 0 {
+ return 2
+ }
+
+ // arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+ // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+ if e+f*g > 0 {
+ return 5
+ }
+
+ // arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+ // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
+ if f+g*h <= 0 {
+ return 6
+ }
+ return 0
+}
+
+// var - var*var
+func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int {
+ // arm64:`CMP`,-`MSUB`,`MUL`,`BEQ`,`(BMI|BPL)`
+ if a-b*c > 0 {
+ return 1
+ }
+
+ // arm64:`CMP`,-`MSUB`,`MUL`,`(BMI|BPL)`
+ if b-c*d >= 0 {
+ return 2
+ }
+
+ // arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+ if e-f*g < 0 {
+ return 5
+ }
+
+ // arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)`
+ if f-g*h >= 0 {
+ return 6
+ }
+ return 0
+}
+
+func CmpToZero_ex5(e, f int32, u uint32) int {
+ // arm:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+ if e+f<<1 > 0 {
+ return 1
+ }
+
+ // arm:`CMP`,-`SUB`,`(BMI|BPL)`
+ if f-int32(u>>2) >= 0 {
+ return 2
+ }
+ return 0
+}
+
+func UintLtZero(a uint8, b uint16, c uint32, d uint64) int {
+ // amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCC|JCS)`
+ // arm64: -`(CMPW|CMP|BHS|BLO)`
+ if a < 0 || b < 0 || c < 0 || d < 0 {
+ return 1
+ }
+ return 0
+}
+
+func UintGeqZero(a uint8, b uint16, c uint32, d uint64) int {
+ // amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCS|JCC)`
+ // arm64: -`(CMPW|CMP|BLO|BHS)`
+ if a >= 0 || b >= 0 || c >= 0 || d >= 0 {
+ return 1
+ }
+ return 0
+}
+
+func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
+ // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLS|BHI)`
+ if a > 0 || b > 0 || c > 0 || d > 0 {
+ return 1
+ }
+ return 0
+}
+
+func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
+ // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHI|BLS)`
+ if a <= 0 || b <= 0 || c <= 0 || d <= 0 {
+ return 1
+ }
+ return 0
+}
+
+func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
+ // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHS|BLO)`
+ if a < 1 || b < 1 || c < 1 || d < 1 {
+ return 1
+ }
+ return 0
+}
+
+func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int {
+ // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLO|BHS)`
+ if a >= 1 || b >= 1 || c >= 1 || d >= 1 {
+ return 1
+ }
+ return 0
+}
+
+func CmpToZeroU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+ // wasm:"I64Eqz"-"I64LtU"
+ if 0 < a {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if 0 < b {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if 0 < c {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if 0 < d {
+ return 1
+ }
+ return 0
+}
+
+func CmpToZeroU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+ // wasm:"I64Eqz"-"I64LeU"
+ if a <= 0 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if b <= 0 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if c <= 0 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if d <= 0 {
+ return 1
+ }
+ return 0
+}
+
+func CmpToOneU_ex1(a uint8, b uint16, c uint32, d uint64) int {
+ // wasm:"I64Eqz"-"I64LtU"
+ if a < 1 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if b < 1 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if c < 1 {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LtU"
+ if d < 1 {
+ return 1
+ }
+ return 0
+}
+
+func CmpToOneU_ex2(a uint8, b uint16, c uint32, d uint64) int {
+ // wasm:"I64Eqz"-"I64LeU"
+ if 1 <= a {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if 1 <= b {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if 1 <= c {
+ return 1
+ }
+ // wasm:"I64Eqz"-"I64LeU"
+ if 1 <= d {
+ return 1
+ }
+ return 0
+}
+
+// Check that small memequals are replaced with eq instructions
+
+func equalConstString1() bool {
+ a := string("A")
+ b := string("Z")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a == b
+}
+
+func equalVarString1(a string) bool {
+ b := string("Z")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a[:1] == b
+}
+
+func equalConstString2() bool {
+ a := string("AA")
+ b := string("ZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a == b
+}
+
+func equalVarString2(a string) bool {
+ b := string("ZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a[:2] == b
+}
+
+func equalConstString4() bool {
+ a := string("AAAA")
+ b := string("ZZZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a == b
+}
+
+func equalVarString4(a string) bool {
+ b := string("ZZZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a[:4] == b
+}
+
+func equalConstString8() bool {
+ a := string("AAAAAAAA")
+ b := string("ZZZZZZZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a == b
+}
+
+func equalVarString8(a string) bool {
+ b := string("ZZZZZZZZ")
+ // amd64:-".*memequal"
+ // arm64:-".*memequal"
+ // ppc64x:-".*memequal"
+ return a[:8] == b
+}
+
+func cmpToCmn(a, b, c, d int) int {
+ var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int
+ // arm64:`CMN`,-`CMP`
+ if a < -8 {
+ c1 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a+1 == 0 {
+ c2 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a+3 != 0 {
+ c3 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a+b == 0 {
+ c4 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if b+c != 0 {
+ c5 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a == -c {
+ c6 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if b != -d {
+ c7 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a*b+c == 0 {
+ c8 = 1
+ }
+ // arm64:`CMN`,-`CMP`
+ if a*c+b != 0 {
+ c9 = 1
+ }
+ // arm64:`CMP`,-`CMN`
+ if b*c-a == 0 {
+ c10 = 1
+ }
+ // arm64:`CMP`,-`CMN`
+ if a*d-b != 0 {
+ c11 = 1
+ }
+ return c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 + c10 + c11
+}
+
+func cmpToCmnLessThan(a, b, c, d int) int {
+ var c1, c2, c3, c4 int
+ // arm64:`CMN`,`CSET\tMI`,-`CMP`
+ if a+1 < 0 {
+ c1 = 1
+ }
+ // arm64:`CMN`,`CSET\tMI`,-`CMP`
+ if a+b < 0 {
+ c2 = 1
+ }
+ // arm64:`CMN`,`CSET\tMI`,-`CMP`
+ if a*b+c < 0 {
+ c3 = 1
+ }
+ // arm64:`CMP`,`CSET\tMI`,-`CMN`
+ if a-b*c < 0 {
+ c4 = 1
+ }
+ return c1 + c2 + c3 + c4
+}
+
+func cmpToCmnGreaterThanEqual(a, b, c, d int) int {
+ var c1, c2, c3, c4 int
+ // arm64:`CMN`,`CSET\tPL`,-`CMP`
+ if a+1 >= 0 {
+ c1 = 1
+ }
+ // arm64:`CMN`,`CSET\tPL`,-`CMP`
+ if a+b >= 0 {
+ c2 = 1
+ }
+ // arm64:`CMN`,`CSET\tPL`,-`CMP`
+ if a*b+c >= 0 {
+ c3 = 1
+ }
+ // arm64:`CMP`,`CSET\tPL`,-`CMN`
+ if a-b*c >= 0 {
+ c4 = 1
+ }
+ return c1 + c2 + c3 + c4
+}
+
+func cmp1(val string) bool {
+ var z string
+ // amd64:-".*memequal"
+ return z == val
+}
+
+func cmp2(val string) bool {
+ var z string
+ // amd64:-".*memequal"
+ return val == z
+}
+
+func cmp3(val string) bool {
+ z := "food"
+ // amd64:-".*memequal"
+ return z == val
+}
+
+func cmp4(val string) bool {
+ z := "food"
+ // amd64:-".*memequal"
+ return val == z
+}
+
+func cmp5[T comparable](val T) bool {
+ var z T
+ // amd64:-".*memequal"
+ return z == val
+}
+
+func cmp6[T comparable](val T) bool {
+ var z T
+ // amd64:-".*memequal"
+ return val == z
+}
+
+func cmp7() {
+ cmp5[string]("") // force instantiation
+ cmp6[string]("") // force instantiation
+}
+
+type Point struct {
+ X, Y int
+}
+
+// invertLessThanNoov checks (LessThanNoov (InvertFlags x)) is lowered as
+// CMP, CSET, CSEL instruction sequence. InvertFlags are only generated under
+// certain conditions, see canonLessThan, so if the code below does not
+// generate an InvertFlags OP, this check may fail.
+func invertLessThanNoov(p1, p2, p3 Point) bool {
+ // arm64:`CMP`,`CSET`,`CSEL`
+ return (p1.X-p3.X)*(p2.Y-p3.Y)-(p2.X-p3.X)*(p1.Y-p3.Y) < 0
+}
diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go
new file mode 100644
index 0000000..1058910
--- /dev/null
+++ b/test/codegen/condmove.go
@@ -0,0 +1,453 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func cmovint(c int) int {
+ x := c + 4
+ if x < 0 {
+ x = 182
+ }
+ // amd64:"CMOVQLT"
+ // arm64:"CSEL\tLT"
+ // ppc64x:"ISEL\t[$]0"
+ // wasm:"Select"
+ return x
+}
+
+func cmovchan(x, y chan int) chan int {
+ if x != y {
+ x = y
+ }
+ // amd64:"CMOVQNE"
+ // arm64:"CSEL\tNE"
+ // ppc64x:"ISEL\t[$]2"
+ // wasm:"Select"
+ return x
+}
+
+func cmovuintptr(x, y uintptr) uintptr {
+ if x < y {
+ x = -y
+ }
+ // amd64:"CMOVQ(HI|CS)"
+ // arm64:"CSNEG\tLS"
+ // ppc64x:"ISEL\t[$]1"
+ // wasm:"Select"
+ return x
+}
+
+func cmov32bit(x, y uint32) uint32 {
+ if x < y {
+ x = -y
+ }
+ // amd64:"CMOVL(HI|CS)"
+ // arm64:"CSNEG\t(LS|HS)"
+ // ppc64x:"ISEL\t[$]1"
+ // wasm:"Select"
+ return x
+}
+
+func cmov16bit(x, y uint16) uint16 {
+ if x < y {
+ x = -y
+ }
+ // amd64:"CMOVW(HI|CS)"
+ // arm64:"CSNEG\t(LS|HS)"
+ // ppc64x:"ISEL\t[$][01]"
+ // wasm:"Select"
+ return x
+}
+
+// Floating point comparison. For EQ/NE, we must
+// generate special code to handle NaNs.
+func cmovfloateq(x, y float64) int {
+ a := 128
+ if x == y {
+ a = 256
+ }
+ // amd64:"CMOVQNE","CMOVQPC"
+ // arm64:"CSEL\tEQ"
+ // ppc64x:"ISEL\t[$]2"
+ // wasm:"Select"
+ return a
+}
+
+func cmovfloatne(x, y float64) int {
+ a := 128
+ if x != y {
+ a = 256
+ }
+ // amd64:"CMOVQNE","CMOVQPS"
+ // arm64:"CSEL\tNE"
+ // ppc64x:"ISEL\t[$]2"
+ // wasm:"Select"
+ return a
+}
+
+//go:noinline
+func frexp(f float64) (frac float64, exp int) {
+ return 1.0, 4
+}
+
+//go:noinline
+func ldexp(frac float64, exp int) float64 {
+ return 1.0
+}
+
+// Generate a CMOV with a floating comparison and integer move.
+func cmovfloatint2(x, y float64) float64 {
+ yfr, yexp := 4.0, 5
+
+ r := x
+ for r >= y {
+ rfr, rexp := frexp(r)
+ if rfr < yfr {
+ rexp = rexp - 1
+ }
+ // amd64:"CMOVQHI"
+ // arm64:"CSEL\tMI"
+ // ppc64x:"ISEL\t[$]0"
+ // wasm:"Select"
+ r = r - ldexp(y, rexp-yexp)
+ }
+ return r
+}
+
+func cmovloaded(x [4]int, y int) int {
+ if x[2] != 0 {
+ y = x[2]
+ } else {
+ y = y >> 2
+ }
+ // amd64:"CMOVQNE"
+ // arm64:"CSEL\tNE"
+ // ppc64x:"ISEL\t[$]2"
+ // wasm:"Select"
+ return y
+}
+
+func cmovuintptr2(x, y uintptr) uintptr {
+ a := x * 2
+ if a == 0 {
+ a = 256
+ }
+ // amd64:"CMOVQEQ"
+ // arm64:"CSEL\tEQ"
+ // ppc64x:"ISEL\t[$]2"
+ // wasm:"Select"
+ return a
+}
+
+// Floating point CMOVs are not supported by amd64/arm64/ppc64x
+func cmovfloatmove(x, y int) float64 {
+ a := 1.0
+ if x <= y {
+ a = 2.0
+ }
+ // amd64:-"CMOV"
+ // arm64:-"CSEL"
+ // ppc64x:-"ISEL"
+ // wasm:-"Select"
+ return a
+}
+
+// On amd64, the following patterns trigger comparison inversion.
+// Test that we correctly invert the CMOV condition
+var gsink int64
+var gusink uint64
+
+func cmovinvert1(x, y int64) int64 {
+ if x < gsink {
+ y = -y
+ }
+ // amd64:"CMOVQGT"
+ return y
+}
+func cmovinvert2(x, y int64) int64 {
+ if x <= gsink {
+ y = -y
+ }
+ // amd64:"CMOVQGE"
+ return y
+}
+func cmovinvert3(x, y int64) int64 {
+ if x == gsink {
+ y = -y
+ }
+ // amd64:"CMOVQEQ"
+ return y
+}
+func cmovinvert4(x, y int64) int64 {
+ if x != gsink {
+ y = -y
+ }
+ // amd64:"CMOVQNE"
+ return y
+}
+func cmovinvert5(x, y uint64) uint64 {
+ if x > gusink {
+ y = -y
+ }
+ // amd64:"CMOVQCS"
+ return y
+}
+func cmovinvert6(x, y uint64) uint64 {
+ if x >= gusink {
+ y = -y
+ }
+ // amd64:"CMOVQLS"
+ return y
+}
+
+func cmovload(a []int, i int, b bool) int {
+ if b {
+ i++
+ }
+ // See issue 26306
+ // amd64:-"CMOVQNE"
+ return a[i]
+}
+
+func cmovstore(a []int, i int, b bool) {
+ if b {
+ i++
+ }
+ // amd64:"CMOVQNE"
+ a[i] = 7
+}
+
+var r0, r1, r2, r3, r4, r5 int
+
+func cmovinc(cond bool, a, b, c int) {
+ var x0, x1 int
+
+ if cond {
+ x0 = a
+ } else {
+ x0 = b + 1
+ }
+ // arm64:"CSINC\tNE", -"CSEL"
+ r0 = x0
+
+ if cond {
+ x1 = b + 1
+ } else {
+ x1 = a
+ }
+ // arm64:"CSINC\tEQ", -"CSEL"
+ r1 = x1
+
+ if cond {
+ c++
+ }
+ // arm64:"CSINC\tEQ", -"CSEL"
+ r2 = c
+}
+
+func cmovinv(cond bool, a, b int) {
+ var x0, x1 int
+
+ if cond {
+ x0 = a
+ } else {
+ x0 = ^b
+ }
+ // arm64:"CSINV\tNE", -"CSEL"
+ r0 = x0
+
+ if cond {
+ x1 = ^b
+ } else {
+ x1 = a
+ }
+ // arm64:"CSINV\tEQ", -"CSEL"
+ r1 = x1
+}
+
+func cmovneg(cond bool, a, b, c int) {
+ var x0, x1 int
+
+ if cond {
+ x0 = a
+ } else {
+ x0 = -b
+ }
+ // arm64:"CSNEG\tNE", -"CSEL"
+ r0 = x0
+
+ if cond {
+ x1 = -b
+ } else {
+ x1 = a
+ }
+ // arm64:"CSNEG\tEQ", -"CSEL"
+ r1 = x1
+}
+
+func cmovsetm(cond bool, x int) {
+ var x0, x1 int
+
+ if cond {
+ x0 = -1
+ } else {
+ x0 = 0
+ }
+ // arm64:"CSETM\tNE", -"CSEL"
+ r0 = x0
+
+ if cond {
+ x1 = 0
+ } else {
+ x1 = -1
+ }
+ // arm64:"CSETM\tEQ", -"CSEL"
+ r1 = x1
+}
+
+func cmovFcmp0(s, t float64, a, b int) {
+ var x0, x1, x2, x3, x4, x5 int
+
+ if s < t {
+ x0 = a
+ } else {
+ x0 = b + 1
+ }
+ // arm64:"CSINC\tMI", -"CSEL"
+ r0 = x0
+
+ if s <= t {
+ x1 = a
+ } else {
+ x1 = ^b
+ }
+ // arm64:"CSINV\tLS", -"CSEL"
+ r1 = x1
+
+ if s > t {
+ x2 = a
+ } else {
+ x2 = -b
+ }
+ // arm64:"CSNEG\tMI", -"CSEL"
+ r2 = x2
+
+ if s >= t {
+ x3 = -1
+ } else {
+ x3 = 0
+ }
+ // arm64:"CSETM\tLS", -"CSEL"
+ r3 = x3
+
+ if s == t {
+ x4 = a
+ } else {
+ x4 = b + 1
+ }
+ // arm64:"CSINC\tEQ", -"CSEL"
+ r4 = x4
+
+ if s != t {
+ x5 = a
+ } else {
+ x5 = b + 1
+ }
+ // arm64:"CSINC\tNE", -"CSEL"
+ r5 = x5
+}
+
+func cmovFcmp1(s, t float64, a, b int) {
+ var x0, x1, x2, x3, x4, x5 int
+
+ if s < t {
+ x0 = b + 1
+ } else {
+ x0 = a
+ }
+ // arm64:"CSINC\tPL", -"CSEL"
+ r0 = x0
+
+ if s <= t {
+ x1 = ^b
+ } else {
+ x1 = a
+ }
+ // arm64:"CSINV\tHI", -"CSEL"
+ r1 = x1
+
+ if s > t {
+ x2 = -b
+ } else {
+ x2 = a
+ }
+ // arm64:"CSNEG\tPL", -"CSEL"
+ r2 = x2
+
+ if s >= t {
+ x3 = 0
+ } else {
+ x3 = -1
+ }
+ // arm64:"CSETM\tHI", -"CSEL"
+ r3 = x3
+
+ if s == t {
+ x4 = b + 1
+ } else {
+ x4 = a
+ }
+ // arm64:"CSINC\tNE", -"CSEL"
+ r4 = x4
+
+ if s != t {
+ x5 = b + 1
+ } else {
+ x5 = a
+ }
+ // arm64:"CSINC\tEQ", -"CSEL"
+ r5 = x5
+}
+
+func cmovzero1(c bool) int {
+ var x int
+ if c {
+ x = 182
+ }
+ // loong64:"MASKEQZ", -"MASKNEZ"
+ return x
+}
+
+func cmovzero2(c bool) int {
+ var x int
+ if !c {
+ x = 182
+ }
+ // loong64:"MASKNEZ", -"MASKEQZ"
+ return x
+}
+
+// Conditionally selecting between a value or 0 can be done without
+// an extra load of 0 to a register on PPC64 by using R0 (which always
+// holds the value $0) instead. Verify both cases where either arg1
+// or arg2 is zero.
+func cmovzeroreg0(a, b int) int {
+ x := 0
+ if a == b {
+ x = a
+ }
+ // ppc64x:"ISEL\t[$]2, R[0-9]+, R0, R[0-9]+"
+ return x
+}
+
+func cmovzeroreg1(a, b int) int {
+ x := a
+ if a == b {
+ x = 0
+ }
+ // ppc64x:"ISEL\t[$]2, R0, R[0-9]+, R[0-9]+"
+ return x
+}
diff --git a/test/codegen/constants.go b/test/codegen/constants.go
new file mode 100644
index 0000000..3ce17d0
--- /dev/null
+++ b/test/codegen/constants.go
@@ -0,0 +1,33 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// A uint16 or sint16 constant shifted left.
+func shifted16BitConstants(out [64]uint64) {
+ // ppc64x: "MOVD\t[$]8193,", "SLD\t[$]27,"
+ out[0] = 0x0000010008000000
+ // ppc64x: "MOVD\t[$]-32767", "SLD\t[$]26,"
+ out[1] = 0xFFFFFE0004000000
+ // ppc64x: "MOVD\t[$]-1", "SLD\t[$]48,"
+ out[2] = 0xFFFF000000000000
+ // ppc64x: "MOVD\t[$]65535", "SLD\t[$]44,"
+ out[3] = 0x0FFFF00000000000
+}
+
+// A contiguous set of 1 bits, potentially wrapping.
+func contiguousMaskConstants(out [64]uint64) {
+ // ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]44, [$]63,"
+ out[0] = 0xFFFFF00000000001
+ // ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]43, [$]63,"
+ out[1] = 0xFFFFF80000000001
+ // ppc64x: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]43, [$]4,"
+ out[2] = 0x0FFFF80000000000
+ // ppc64x/power8: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]33, [$]63,"
+ // ppc64x/power9: "MOVD\t[$]-1", "RLDC\tR[0-9]+, [$]33, [$]63,"
+ // ppc64x/power10: "MOVD\t[$]-8589934591,"
+ out[3] = 0xFFFFFFFE00000001
+}
diff --git a/test/codegen/copy.go b/test/codegen/copy.go
new file mode 100644
index 0000000..17ee8bc
--- /dev/null
+++ b/test/codegen/copy.go
@@ -0,0 +1,159 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// Check small copies are replaced with moves.
+
+func movesmall4() {
+ x := [...]byte{1, 2, 3, 4}
+ // 386:-".*memmove"
+ // amd64:-".*memmove"
+ // arm:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(x[1:], x[:])
+}
+
+func movesmall7() {
+ x := [...]byte{1, 2, 3, 4, 5, 6, 7}
+ // 386:-".*memmove"
+ // amd64:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(x[1:], x[:])
+}
+
+func movesmall16() {
+ x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
+ // amd64:-".*memmove"
+ // ppc64x:".*memmove"
+ copy(x[1:], x[:])
+}
+
+var x [256]byte
+
+// Check that large disjoint copies are replaced with moves.
+
+func moveDisjointStack32() {
+ var s [32]byte
+ // ppc64x:-".*memmove"
+ // ppc64x/power8:"LXVD2X",-"ADD",-"BC"
+ // ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+ copy(s[:], x[:32])
+ runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack64() {
+ var s [96]byte
+ // ppc64x:-".*memmove"
+ // ppc64x/power8:"LXVD2X","ADD","BC"
+ // ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+ copy(s[:], x[:96])
+ runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack() {
+ var s [256]byte
+ // s390x:-".*memmove"
+ // amd64:-".*memmove"
+ // ppc64x:-".*memmove"
+ // ppc64x/power8:"LXVD2X"
+ // ppc64x/power9:"LXV",-"LXVD2X"
+ copy(s[:], x[:])
+ runtime.KeepAlive(&s)
+}
+
+func moveDisjointArg(b *[256]byte) {
+ var s [256]byte
+ // s390x:-".*memmove"
+ // amd64:-".*memmove"
+ // ppc64x:-".*memmove"
+ // ppc64x/power8:"LXVD2X"
+ // ppc64x/power9:"LXV",-"LXVD2X"
+ copy(s[:], b[:])
+ runtime.KeepAlive(&s)
+}
+
+func moveDisjointNoOverlap(a *[256]byte) {
+ // s390x:-".*memmove"
+ // amd64:-".*memmove"
+ // ppc64x:-".*memmove"
+ // ppc64x/power8:"LXVD2X"
+ // ppc64x/power9:"LXV",-"LXVD2X"
+ copy(a[:], a[128:])
+}
+
+// Check arch-specific memmove lowering. See issue 41662 fot details
+
+func moveArchLowering1(b []byte, x *[1]byte) {
+ _ = b[1]
+ // amd64:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(b, x[:])
+}
+
+func moveArchLowering2(b []byte, x *[2]byte) {
+ _ = b[2]
+ // amd64:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(b, x[:])
+}
+
+func moveArchLowering4(b []byte, x *[4]byte) {
+ _ = b[4]
+ // amd64:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(b, x[:])
+}
+
+func moveArchLowering8(b []byte, x *[8]byte) {
+ _ = b[8]
+ // amd64:-".*memmove"
+ // arm64:-".*memmove"
+ // ppc64x:-".*memmove"
+ copy(b, x[:])
+}
+
+func moveArchLowering16(b []byte, x *[16]byte) {
+ _ = b[16]
+ // amd64:-".*memmove"
+ copy(b, x[:])
+}
+
+// Check that no branches are generated when the pointers are [not] equal.
+
+func ptrEqual() {
+ // amd64:-"JEQ",-"JNE"
+ // ppc64x:-"BEQ",-"BNE"
+ // s390x:-"BEQ",-"BNE"
+ copy(x[:], x[:])
+}
+
+func ptrOneOffset() {
+ // amd64:-"JEQ",-"JNE"
+ // ppc64x:-"BEQ",-"BNE"
+ // s390x:-"BEQ",-"BNE"
+ copy(x[1:], x[:])
+}
+
+func ptrBothOffset() {
+ // amd64:-"JEQ",-"JNE"
+ // ppc64x:-"BEQ",-"BNE"
+ // s390x:-"BEQ",-"BNE"
+ copy(x[1:], x[2:])
+}
+
+// Verify #62698 on PPC64.
+func noMaskOnCopy(a []int, s string, x int) int {
+ // ppc64x:-"MOVD\t$-1", -"AND"
+ return a[x&^copy([]byte{}, s)]
+}
diff --git a/test/codegen/floats.go b/test/codegen/floats.go
new file mode 100644
index 0000000..7991174
--- /dev/null
+++ b/test/codegen/floats.go
@@ -0,0 +1,158 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains codegen tests related to arithmetic
+// simplifications and optimizations on float types.
+// For codegen tests on integer types, see arithmetic.go.
+
+// --------------------- //
+// Strength-reduce //
+// --------------------- //
+
+func Mul2(f float64) float64 {
+ // 386/sse2:"ADDSD",-"MULSD"
+ // amd64:"ADDSD",-"MULSD"
+ // arm/7:"ADDD",-"MULD"
+ // arm64:"FADDD",-"FMULD"
+ // ppc64x:"FADD",-"FMUL"
+ // riscv64:"FADDD",-"FMULD"
+ return f * 2.0
+}
+
+func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
+ // 386/sse2:"MULSD",-"DIVSD"
+ // amd64:"MULSD",-"DIVSD"
+ // arm/7:"MULD",-"DIVD"
+ // arm64:"FMULD",-"FDIVD"
+ // ppc64x:"FMUL",-"FDIV"
+ // riscv64:"FMULD",-"FDIVD"
+ x := f1 / 16.0
+
+ // 386/sse2:"MULSD",-"DIVSD"
+ // amd64:"MULSD",-"DIVSD"
+ // arm/7:"MULD",-"DIVD"
+ // arm64:"FMULD",-"FDIVD"
+ // ppc64x:"FMUL",-"FDIVD"
+ // riscv64:"FMULD",-"FDIVD"
+ y := f2 / 0.125
+
+ // 386/sse2:"ADDSD",-"DIVSD",-"MULSD"
+ // amd64:"ADDSD",-"DIVSD",-"MULSD"
+ // arm/7:"ADDD",-"MULD",-"DIVD"
+ // arm64:"FADDD",-"FMULD",-"FDIVD"
+ // ppc64x:"FADD",-"FMUL",-"FDIV"
+ // riscv64:"FADDD",-"FMULD",-"FDIVD"
+ z := f3 / 0.5
+
+ return x, y, z
+}
+
+func indexLoad(b0 []float32, b1 float32, idx int) float32 {
+ // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
+ return b0[idx] * b1
+}
+
+func indexStore(b0 []float64, b1 float64, idx int) {
+ // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
+ b0[idx] = b1
+}
+
+// ----------- //
+// Fused //
+// ----------- //
+
+func FusedAdd32(x, y, z float32) float32 {
+ // s390x:"FMADDS\t"
+ // ppc64x:"FMADDS\t"
+ // arm64:"FMADDS"
+ // riscv64:"FMADDS\t"
+ return x*y + z
+}
+
+func FusedSub32_a(x, y, z float32) float32 {
+ // s390x:"FMSUBS\t"
+ // ppc64x:"FMSUBS\t"
+ // riscv64:"FMSUBS\t"
+ return x*y - z
+}
+
+func FusedSub32_b(x, y, z float32) float32 {
+ // arm64:"FMSUBS"
+ // riscv64:"FNMSUBS\t"
+ return z - x*y
+}
+
+func FusedAdd64(x, y, z float64) float64 {
+ // s390x:"FMADD\t"
+ // ppc64x:"FMADD\t"
+ // arm64:"FMADDD"
+ // riscv64:"FMADDD\t"
+ return x*y + z
+}
+
+func FusedSub64_a(x, y, z float64) float64 {
+ // s390x:"FMSUB\t"
+ // ppc64x:"FMSUB\t"
+ // riscv64:"FMSUBD\t"
+ return x*y - z
+}
+
+func FusedSub64_b(x, y, z float64) float64 {
+ // arm64:"FMSUBD"
+ // riscv64:"FNMSUBD\t"
+ return z - x*y
+}
+
+func Cmp(f float64) bool {
+ // arm64:"FCMPD","(BGT|BLE|BMI|BPL)",-"CSET\tGT",-"CBZ"
+ return f > 4 || f < -4
+}
+
+func CmpZero64(f float64) bool {
+ // s390x:"LTDBR",-"FCMPU"
+ return f <= 0
+}
+
+func CmpZero32(f float32) bool {
+ // s390x:"LTEBR",-"CEBR"
+ return f <= 0
+}
+
+func CmpWithSub(a float64, b float64) bool {
+ f := a - b
+ // s390x:-"LTDBR"
+ return f <= 0
+}
+
+func CmpWithAdd(a float64, b float64) bool {
+ f := a + b
+ // s390x:-"LTDBR"
+ return f <= 0
+}
+
+// ---------------- //
+// Non-floats //
+// ---------------- //
+
+// We should make sure that the compiler doesn't generate floating point
+// instructions for non-float operations on Plan 9, because floating point
+// operations are not allowed in the note handler.
+
+func ArrayZero() [16]byte {
+ // amd64:"MOVUPS"
+ // plan9/amd64/:-"MOVUPS"
+ var a [16]byte
+ return a
+}
+
+func ArrayCopy(a [16]byte) (b [16]byte) {
+ // amd64:"MOVUPS"
+ // plan9/amd64/:-"MOVUPS"
+ b = a
+ return
+}
diff --git a/test/codegen/fuse.go b/test/codegen/fuse.go
new file mode 100644
index 0000000..79dd337
--- /dev/null
+++ b/test/codegen/fuse.go
@@ -0,0 +1,197 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Notes:
+// - these examples use channels to provide a source of
+// unknown values that cannot be optimized away
+// - these examples use for loops to force branches
+// backward (predicted taken)
+
+// ---------------------------------- //
+// signed integer range (conjunction) //
+// ---------------------------------- //
+
+func si1c(c <-chan int64) {
+ // amd64:"CMPQ\t.+, [$]256"
+ // s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255"
+ for x := <-c; x >= 0 && x < 256; x = <-c {
+ }
+}
+
+func si2c(c <-chan int32) {
+ // amd64:"CMPL\t.+, [$]256"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+ for x := <-c; x >= 0 && x < 256; x = <-c {
+ }
+}
+
+func si3c(c <-chan int16) {
+ // amd64:"CMPW\t.+, [$]256"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255"
+ for x := <-c; x >= 0 && x < 256; x = <-c {
+ }
+}
+
+func si4c(c <-chan int8) {
+ // amd64:"CMPB\t.+, [$]10"
+ // s390x:"CLIJ\t[$]4, R[0-9]+, [$]10"
+ for x := <-c; x >= 0 && x < 10; x = <-c {
+ }
+}
+
+func si5c(c <-chan int64) {
+ // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+ // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+ for x := <-c; x < 256 && x > 4; x = <-c {
+ }
+}
+
+func si6c(c <-chan int32) {
+ // amd64:"CMPL\t.+, [$]255","DECL\t"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+ for x := <-c; x > 0 && x <= 256; x = <-c {
+ }
+}
+
+func si7c(c <-chan int16) {
+ // amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]60","ADDW\t[$]10,"
+ for x := <-c; x >= -10 && x <= 50; x = <-c {
+ }
+}
+
+func si8c(c <-chan int8) {
+ // amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+ // s390x:"CLIJ\t[$]4, R[0-9]+, [$]126","ADDW\t[$]126,"
+ for x := <-c; x >= -126 && x < 0; x = <-c {
+ }
+}
+
+// ---------------------------------- //
+// signed integer range (disjunction) //
+// ---------------------------------- //
+
+func si1d(c <-chan int64) {
+ // amd64:"CMPQ\t.+, [$]256"
+ // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255"
+ for x := <-c; x < 0 || x >= 256; x = <-c {
+ }
+}
+
+func si2d(c <-chan int32) {
+ // amd64:"CMPL\t.+, [$]256"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+ for x := <-c; x < 0 || x >= 256; x = <-c {
+ }
+}
+
+func si3d(c <-chan int16) {
+ // amd64:"CMPW\t.+, [$]256"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255"
+ for x := <-c; x < 0 || x >= 256; x = <-c {
+ }
+}
+
+func si4d(c <-chan int8) {
+ // amd64:"CMPB\t.+, [$]10"
+ // s390x:"CLIJ\t[$]10, R[0-9]+, [$]10"
+ for x := <-c; x < 0 || x >= 10; x = <-c {
+ }
+}
+
+func si5d(c <-chan int64) {
+ // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+ // s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+ for x := <-c; x >= 256 || x <= 4; x = <-c {
+ }
+}
+
+func si6d(c <-chan int32) {
+ // amd64:"CMPL\t.+, [$]255","DECL\t"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255","ADDW\t[$]-1,"
+ for x := <-c; x <= 0 || x > 256; x = <-c {
+ }
+}
+
+func si7d(c <-chan int16) {
+ // amd64:"CMPW\t.+, [$]60","ADDL\t[$]10,"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]60","ADDW\t[$]10,"
+ for x := <-c; x < -10 || x > 50; x = <-c {
+ }
+}
+
+func si8d(c <-chan int8) {
+ // amd64:"CMPB\t.+, [$]126","ADDL\t[$]126,"
+ // s390x:"CLIJ\t[$]10, R[0-9]+, [$]126","ADDW\t[$]126,"
+ for x := <-c; x < -126 || x >= 0; x = <-c {
+ }
+}
+
+// ------------------------------------ //
+// unsigned integer range (conjunction) //
+// ------------------------------------ //
+
+func ui1c(c <-chan uint64) {
+ // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+ // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5,"
+ for x := <-c; x < 256 && x > 4; x = <-c {
+ }
+}
+
+func ui2c(c <-chan uint32) {
+ // amd64:"CMPL\t.+, [$]255","DECL\t"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1,"
+ for x := <-c; x > 0 && x <= 256; x = <-c {
+ }
+}
+
+func ui3c(c <-chan uint16) {
+ // amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+ // s390x:"CLIJ\t[$]12, R[0-9]+, [$]40","ADDW\t[$]-10,"
+ for x := <-c; x >= 10 && x <= 50; x = <-c {
+ }
+}
+
+func ui4c(c <-chan uint8) {
+ // amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+ // s390x:"CLIJ\t[$]4, R[0-9]+, [$]2","ADDW\t[$]-126,"
+ for x := <-c; x >= 126 && x < 128; x = <-c {
+ }
+}
+
+// ------------------------------------ //
+// unsigned integer range (disjunction) //
+// ------------------------------------ //
+
+func ui1d(c <-chan uint64) {
+ // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5,"
+ // s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5,"
+ for x := <-c; x >= 256 || x <= 4; x = <-c {
+ }
+}
+
+func ui2d(c <-chan uint32) {
+ // amd64:"CMPL\t.+, [$]254","ADDL\t[$]-2,"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]254","ADDW\t[$]-2,"
+ for x := <-c; x <= 1 || x > 256; x = <-c {
+ }
+}
+
+func ui3d(c <-chan uint16) {
+ // amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10,"
+ // s390x:"CLIJ\t[$]2, R[0-9]+, [$]40","ADDW\t[$]-10,"
+ for x := <-c; x < 10 || x > 50; x = <-c {
+ }
+}
+
+func ui4d(c <-chan uint8) {
+ // amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126,"
+ // s390x:"CLIJ\t[$]10, R[0-9]+, [$]2","ADDW\t[$]-126,"
+ for x := <-c; x < 126 || x >= 128; x = <-c {
+ }
+}
diff --git a/test/codegen/ifaces.go b/test/codegen/ifaces.go
new file mode 100644
index 0000000..2be3fa5
--- /dev/null
+++ b/test/codegen/ifaces.go
@@ -0,0 +1,27 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type I interface{ M() }
+
+func NopConvertIface(x I) I {
+ // amd64:-`.*runtime.convI2I`
+ return I(x)
+}
+
+func NopConvertGeneric[T any](x T) T {
+ // amd64:-`.*runtime.convI2I`
+ return T(x)
+}
+
+var NopConvertGenericIface = NopConvertGeneric[I]
+
+func ConvToM(x any) I {
+ // amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(.*\)`,`MOVQ\t8\(.*\)(.*\*1)`
+ // arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU`,`MOVD\t\(R.*\)\(R.*\)`
+ return x.(I)
+}
diff --git a/test/codegen/issue22703.go b/test/codegen/issue22703.go
new file mode 100644
index 0000000..0201de6
--- /dev/null
+++ b/test/codegen/issue22703.go
@@ -0,0 +1,535 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type I interface {
+ foo000()
+ foo001()
+ foo002()
+ foo003()
+ foo004()
+ foo005()
+ foo006()
+ foo007()
+ foo008()
+ foo009()
+ foo010()
+ foo011()
+ foo012()
+ foo013()
+ foo014()
+ foo015()
+ foo016()
+ foo017()
+ foo018()
+ foo019()
+ foo020()
+ foo021()
+ foo022()
+ foo023()
+ foo024()
+ foo025()
+ foo026()
+ foo027()
+ foo028()
+ foo029()
+ foo030()
+ foo031()
+ foo032()
+ foo033()
+ foo034()
+ foo035()
+ foo036()
+ foo037()
+ foo038()
+ foo039()
+ foo040()
+ foo041()
+ foo042()
+ foo043()
+ foo044()
+ foo045()
+ foo046()
+ foo047()
+ foo048()
+ foo049()
+ foo050()
+ foo051()
+ foo052()
+ foo053()
+ foo054()
+ foo055()
+ foo056()
+ foo057()
+ foo058()
+ foo059()
+ foo060()
+ foo061()
+ foo062()
+ foo063()
+ foo064()
+ foo065()
+ foo066()
+ foo067()
+ foo068()
+ foo069()
+ foo070()
+ foo071()
+ foo072()
+ foo073()
+ foo074()
+ foo075()
+ foo076()
+ foo077()
+ foo078()
+ foo079()
+ foo080()
+ foo081()
+ foo082()
+ foo083()
+ foo084()
+ foo085()
+ foo086()
+ foo087()
+ foo088()
+ foo089()
+ foo090()
+ foo091()
+ foo092()
+ foo093()
+ foo094()
+ foo095()
+ foo096()
+ foo097()
+ foo098()
+ foo099()
+ foo100()
+ foo101()
+ foo102()
+ foo103()
+ foo104()
+ foo105()
+ foo106()
+ foo107()
+ foo108()
+ foo109()
+ foo110()
+ foo111()
+ foo112()
+ foo113()
+ foo114()
+ foo115()
+ foo116()
+ foo117()
+ foo118()
+ foo119()
+ foo120()
+ foo121()
+ foo122()
+ foo123()
+ foo124()
+ foo125()
+ foo126()
+ foo127()
+ foo128()
+ foo129()
+ foo130()
+ foo131()
+ foo132()
+ foo133()
+ foo134()
+ foo135()
+ foo136()
+ foo137()
+ foo138()
+ foo139()
+ foo140()
+ foo141()
+ foo142()
+ foo143()
+ foo144()
+ foo145()
+ foo146()
+ foo147()
+ foo148()
+ foo149()
+ foo150()
+ foo151()
+ foo152()
+ foo153()
+ foo154()
+ foo155()
+ foo156()
+ foo157()
+ foo158()
+ foo159()
+ foo160()
+ foo161()
+ foo162()
+ foo163()
+ foo164()
+ foo165()
+ foo166()
+ foo167()
+ foo168()
+ foo169()
+ foo170()
+ foo171()
+ foo172()
+ foo173()
+ foo174()
+ foo175()
+ foo176()
+ foo177()
+ foo178()
+ foo179()
+ foo180()
+ foo181()
+ foo182()
+ foo183()
+ foo184()
+ foo185()
+ foo186()
+ foo187()
+ foo188()
+ foo189()
+ foo190()
+ foo191()
+ foo192()
+ foo193()
+ foo194()
+ foo195()
+ foo196()
+ foo197()
+ foo198()
+ foo199()
+ foo200()
+ foo201()
+ foo202()
+ foo203()
+ foo204()
+ foo205()
+ foo206()
+ foo207()
+ foo208()
+ foo209()
+ foo210()
+ foo211()
+ foo212()
+ foo213()
+ foo214()
+ foo215()
+ foo216()
+ foo217()
+ foo218()
+ foo219()
+ foo220()
+ foo221()
+ foo222()
+ foo223()
+ foo224()
+ foo225()
+ foo226()
+ foo227()
+ foo228()
+ foo229()
+ foo230()
+ foo231()
+ foo232()
+ foo233()
+ foo234()
+ foo235()
+ foo236()
+ foo237()
+ foo238()
+ foo239()
+ foo240()
+ foo241()
+ foo242()
+ foo243()
+ foo244()
+ foo245()
+ foo246()
+ foo247()
+ foo248()
+ foo249()
+ foo250()
+ foo251()
+ foo252()
+ foo253()
+ foo254()
+ foo255()
+ foo256()
+ foo257()
+ foo258()
+ foo259()
+ foo260()
+ foo261()
+ foo262()
+ foo263()
+ foo264()
+ foo265()
+ foo266()
+ foo267()
+ foo268()
+ foo269()
+ foo270()
+ foo271()
+ foo272()
+ foo273()
+ foo274()
+ foo275()
+ foo276()
+ foo277()
+ foo278()
+ foo279()
+ foo280()
+ foo281()
+ foo282()
+ foo283()
+ foo284()
+ foo285()
+ foo286()
+ foo287()
+ foo288()
+ foo289()
+ foo290()
+ foo291()
+ foo292()
+ foo293()
+ foo294()
+ foo295()
+ foo296()
+ foo297()
+ foo298()
+ foo299()
+ foo300()
+ foo301()
+ foo302()
+ foo303()
+ foo304()
+ foo305()
+ foo306()
+ foo307()
+ foo308()
+ foo309()
+ foo310()
+ foo311()
+ foo312()
+ foo313()
+ foo314()
+ foo315()
+ foo316()
+ foo317()
+ foo318()
+ foo319()
+ foo320()
+ foo321()
+ foo322()
+ foo323()
+ foo324()
+ foo325()
+ foo326()
+ foo327()
+ foo328()
+ foo329()
+ foo330()
+ foo331()
+ foo332()
+ foo333()
+ foo334()
+ foo335()
+ foo336()
+ foo337()
+ foo338()
+ foo339()
+ foo340()
+ foo341()
+ foo342()
+ foo343()
+ foo344()
+ foo345()
+ foo346()
+ foo347()
+ foo348()
+ foo349()
+ foo350()
+ foo351()
+ foo352()
+ foo353()
+ foo354()
+ foo355()
+ foo356()
+ foo357()
+ foo358()
+ foo359()
+ foo360()
+ foo361()
+ foo362()
+ foo363()
+ foo364()
+ foo365()
+ foo366()
+ foo367()
+ foo368()
+ foo369()
+ foo370()
+ foo371()
+ foo372()
+ foo373()
+ foo374()
+ foo375()
+ foo376()
+ foo377()
+ foo378()
+ foo379()
+ foo380()
+ foo381()
+ foo382()
+ foo383()
+ foo384()
+ foo385()
+ foo386()
+ foo387()
+ foo388()
+ foo389()
+ foo390()
+ foo391()
+ foo392()
+ foo393()
+ foo394()
+ foo395()
+ foo396()
+ foo397()
+ foo398()
+ foo399()
+ foo400()
+ foo401()
+ foo402()
+ foo403()
+ foo404()
+ foo405()
+ foo406()
+ foo407()
+ foo408()
+ foo409()
+ foo410()
+ foo411()
+ foo412()
+ foo413()
+ foo414()
+ foo415()
+ foo416()
+ foo417()
+ foo418()
+ foo419()
+ foo420()
+ foo421()
+ foo422()
+ foo423()
+ foo424()
+ foo425()
+ foo426()
+ foo427()
+ foo428()
+ foo429()
+ foo430()
+ foo431()
+ foo432()
+ foo433()
+ foo434()
+ foo435()
+ foo436()
+ foo437()
+ foo438()
+ foo439()
+ foo440()
+ foo441()
+ foo442()
+ foo443()
+ foo444()
+ foo445()
+ foo446()
+ foo447()
+ foo448()
+ foo449()
+ foo450()
+ foo451()
+ foo452()
+ foo453()
+ foo454()
+ foo455()
+ foo456()
+ foo457()
+ foo458()
+ foo459()
+ foo460()
+ foo461()
+ foo462()
+ foo463()
+ foo464()
+ foo465()
+ foo466()
+ foo467()
+ foo468()
+ foo469()
+ foo470()
+ foo471()
+ foo472()
+ foo473()
+ foo474()
+ foo475()
+ foo476()
+ foo477()
+ foo478()
+ foo479()
+ foo480()
+ foo481()
+ foo482()
+ foo483()
+ foo484()
+ foo485()
+ foo486()
+ foo487()
+ foo488()
+ foo489()
+ foo490()
+ foo491()
+ foo492()
+ foo493()
+ foo494()
+ foo495()
+ foo496()
+ foo497()
+ foo498()
+ foo499()
+ foo500()
+ foo501()
+ foo502()
+ foo503()
+ foo504()
+ foo505()
+ foo506()
+ foo507()
+ foo508()
+ foo509()
+ foo510()
+ foo511()
+}
+
+// Nil checks before calling interface methods.
+// We need it only when the offset is large.
+
+func callMethodSmallOffset(i I) {
+ // amd64:-"TESTB"
+ i.foo001()
+}
+
+func callMethodLargeOffset(i I) {
+ // amd64:"TESTB"
+ i.foo511()
+}
diff --git a/test/codegen/issue25378.go b/test/codegen/issue25378.go
new file mode 100644
index 0000000..810a022
--- /dev/null
+++ b/test/codegen/issue25378.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var wsp = [256]bool{
+ ' ': true,
+ '\t': true,
+ '\n': true,
+ '\r': true,
+}
+
+func zeroExtArgByte(ch [2]byte) bool {
+ return wsp[ch[0]] // amd64:-"MOVBLZX\t..,.."
+}
+
+func zeroExtArgUint16(ch [2]uint16) bool {
+ return wsp[ch[0]] // amd64:-"MOVWLZX\t..,.."
+}
diff --git a/test/codegen/issue31618.go b/test/codegen/issue31618.go
new file mode 100644
index 0000000..8effe29
--- /dev/null
+++ b/test/codegen/issue31618.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Make sure we remove both inline marks in the following code.
+// Both +5 and +6 should map to real instructions, which can
+// be used as inline marks instead of explicit nops.
+func f(x int) int {
+ // amd64:-"XCHGL"
+ x = g(x) + 5
+ // amd64:-"XCHGL"
+ x = g(x) + 6
+ return x
+}
+
+func g(x int) int {
+ return x >> 3
+}
diff --git a/test/codegen/issue33580.go b/test/codegen/issue33580.go
new file mode 100644
index 0000000..1ded944
--- /dev/null
+++ b/test/codegen/issue33580.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure we reuse large constant loads, if we can.
+// See issue 33580.
+
+package codegen
+
+const (
+ A = 7777777777777777
+ B = 8888888888888888
+)
+
+func f(x, y uint64) uint64 {
+ p := x & A
+ q := y & A
+ r := x & B
+ // amd64:-"MOVQ.*8888888888888888"
+ s := y & B
+
+ return p * q * r * s
+}
diff --git a/test/codegen/issue38554.go b/test/codegen/issue38554.go
new file mode 100644
index 0000000..84db847
--- /dev/null
+++ b/test/codegen/issue38554.go
@@ -0,0 +1,15 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that we are zeroing directly instead of
+// copying a large zero value. Issue 38554.
+
+package codegen
+
+func retlarge() [256]byte {
+ // amd64:-"DUFFCOPY"
+ return [256]byte{}
+}
diff --git a/test/codegen/issue42610.go b/test/codegen/issue42610.go
new file mode 100644
index 0000000..41d0e90
--- /dev/null
+++ b/test/codegen/issue42610.go
@@ -0,0 +1,28 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Don't allow 0 masks in shift lowering rules on ppc64x.
+// See issue 42610.
+
+package codegen
+
+func f32(a []int32, i uint32) {
+ g := func(p int32) int32 {
+ i = uint32(p) * (uint32(p) & (i & 1))
+ return 1
+ }
+ // ppc64x: -"RLWNIM"
+ a[0] = g(8) >> 1
+}
+
+func f(a []int, i uint) {
+ g := func(p int) int {
+ i = uint(p) * (uint(p) & (i & 1))
+ return 1
+ }
+ // ppc64x: -"RLDIC"
+ a[0] = g(8) >> 1
+}
diff --git a/test/codegen/issue48054.go b/test/codegen/issue48054.go
new file mode 100644
index 0000000..1f3a041
--- /dev/null
+++ b/test/codegen/issue48054.go
@@ -0,0 +1,31 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func a(n string) bool {
+ // arm64:"CBZ"
+ if len(n) > 0 {
+ return true
+ }
+ return false
+}
+
+func a2(n []int) bool {
+ // arm64:"CBZ"
+ if len(n) > 0 {
+ return true
+ }
+ return false
+}
+
+func a3(n []int) bool {
+ // amd64:"TESTQ"
+ if len(n) < 1 {
+ return true
+ }
+ return false
+}
diff --git a/test/codegen/issue52635.go b/test/codegen/issue52635.go
new file mode 100644
index 0000000..9b08cad
--- /dev/null
+++ b/test/codegen/issue52635.go
@@ -0,0 +1,41 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Test that optimized range memclr works with pointers to arrays.
+// The clears get inlined, see https://github.com/golang/go/issues/56997
+
+package codegen
+
+type T struct {
+ a *[10]int
+ b [10]int
+}
+
+func (t *T) f() {
+ // amd64:-".*runtime.memclrNoHeapPointers"
+ // amd64:"DUFFZERO"
+ for i := range t.a {
+ t.a[i] = 0
+ }
+
+ // amd64:-".*runtime.memclrNoHeapPointers"
+ // amd64:"DUFFZERO"
+ for i := range *t.a {
+ t.a[i] = 0
+ }
+
+ // amd64:-".*runtime.memclrNoHeapPointers"
+ // amd64:"DUFFZERO"
+ for i := range t.a {
+ (*t.a)[i] = 0
+ }
+
+ // amd64:-".*runtime.memclrNoHeapPointers"
+ // amd64:"DUFFZERO"
+ for i := range *t.a {
+ (*t.a)[i] = 0
+ }
+}
diff --git a/test/codegen/issue54467.go b/test/codegen/issue54467.go
new file mode 100644
index 0000000..d34b327
--- /dev/null
+++ b/test/codegen/issue54467.go
@@ -0,0 +1,59 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func f1(x *[4]int, y *[4]int) {
+ // amd64:".*memmove"
+ *x = *y
+}
+func f2(x *[4]int, y [4]int) {
+ // amd64:-".*memmove"
+ *x = y
+}
+func f3(x *[4]int, y *[4]int) {
+ // amd64:-".*memmove"
+ t := *y
+ // amd64:-".*memmove"
+ *x = t
+}
+func f4(x *[4]int, y [4]int) {
+ // amd64:-".*memmove"
+ t := y
+ // amd64:-".*memmove"
+ *x = t
+}
+
+type T struct {
+ a [4]int
+}
+
+func f5(x, y *T) {
+ // amd64:-".*memmove"
+ x.a = y.a
+}
+func f6(x *T, y T) {
+ // amd64:-".*memmove"
+ x.a = y.a
+}
+func f7(x *T, y *[4]int) {
+ // amd64:-".*memmove"
+ x.a = *y
+}
+func f8(x *[4]int, y *T) {
+ // amd64:-".*memmove"
+ *x = y.a
+}
+
+func f9(x [][4]int, y [][4]int, i, j int) {
+ // amd64:-".*memmove"
+ x[i] = y[j]
+}
+
+func f10() []byte {
+ // amd64:-".*memmove"
+ return []byte("aReasonablyBigTestString")
+}
diff --git a/test/codegen/issue56440.go b/test/codegen/issue56440.go
new file mode 100644
index 0000000..c6c1e66
--- /dev/null
+++ b/test/codegen/issue56440.go
@@ -0,0 +1,34 @@
+// asmcheck
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Check to make sure that we recognize when the length of an append
+// is constant. We check this by making sure that the constant length
+// is folded into a load offset.
+
+package p
+
+func f(x []int) int {
+ s := make([]int, 3)
+ s = append(s, 4, 5)
+ // amd64:`MOVQ\t40\(.*\),`
+ return x[len(s)]
+}
+
+func g(x []int, p *bool) int {
+ s := make([]int, 3)
+ for {
+ s = s[:3]
+ if cap(s) < 5 {
+ s = make([]int, 3, 5)
+ }
+ s = append(s, 4, 5)
+ if *p {
+ // amd64:`MOVQ\t40\(.*\),`
+ return x[len(s)]
+ }
+ }
+ return 0
+}
diff --git a/test/codegen/issue58166.go b/test/codegen/issue58166.go
new file mode 100644
index 0000000..8be5aac
--- /dev/null
+++ b/test/codegen/issue58166.go
@@ -0,0 +1,23 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
+ for i := 0; i < m; i++ {
+ ctmp := c[i*ldc : i*ldc+n]
+ for l, v := range a[i*lda : i*lda+k] {
+ tmp := alpha * v
+ if tmp != 0 {
+ x := b[l*ldb : l*ldb+n]
+ // amd64:"INCQ"
+ for i, v := range x {
+ ctmp[i] += tmp * v
+ }
+ }
+ }
+ }
+}
diff --git a/test/codegen/issue60324.go b/test/codegen/issue60324.go
new file mode 100644
index 0000000..d106e7e
--- /dev/null
+++ b/test/codegen/issue60324.go
@@ -0,0 +1,36 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func main() {
+ // amd64:"LEAQ\tcommand-line-arguments\\.main\\.f\\.g\\.h\\.func3"
+ f(1)()
+
+ // amd64:"LEAQ\tcommand-line-arguments\\.main\\.g\\.h\\.func2"
+ g(2)()
+
+ // amd64:"LEAQ\tcommand-line-arguments\\.main\\.h\\.func1"
+ h(3)()
+
+ // amd64:"LEAQ\tcommand-line-arguments\\.main\\.f\\.g\\.h\\.func4"
+ f(4)()
+}
+
+func f(x int) func() {
+ // amd64:"LEAQ\tcommand-line-arguments\\.f\\.g\\.h\\.func1"
+ return g(x)
+}
+
+func g(x int) func() {
+ // amd64:"LEAQ\tcommand-line-arguments\\.g\\.h\\.func1"
+ return h(x)
+}
+
+func h(x int) func() {
+ // amd64:"LEAQ\tcommand-line-arguments\\.h\\.func1"
+ return func() { recover() }
+}
diff --git a/test/codegen/issue60673.go b/test/codegen/issue60673.go
new file mode 100644
index 0000000..2df031a
--- /dev/null
+++ b/test/codegen/issue60673.go
@@ -0,0 +1,18 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:noinline
+func f(x int32) {
+}
+
+func g(p *int32) {
+ // argument marshaling code should live at line 17, not line 15.
+ x := *p
+ // 386: `MOVL\s[A-Z]+,\s\(SP\)`
+ f(x)
+}
diff --git a/test/codegen/issue61356.go b/test/codegen/issue61356.go
new file mode 100644
index 0000000..65753d5
--- /dev/null
+++ b/test/codegen/issue61356.go
@@ -0,0 +1,55 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure this code doesn't generate spill/restore.
+
+package codegen
+
+func pack20(in *[20]uint64) uint64 {
+ var out uint64
+ out |= 4
+ // amd64:-`.*SP.*`
+ out |= in[0] << 4
+ // amd64:-`.*SP.*`
+ out |= in[1] << 7
+ // amd64:-`.*SP.*`
+ out |= in[2] << 10
+ // amd64:-`.*SP.*`
+ out |= in[3] << 13
+ // amd64:-`.*SP.*`
+ out |= in[4] << 16
+ // amd64:-`.*SP.*`
+ out |= in[5] << 19
+ // amd64:-`.*SP.*`
+ out |= in[6] << 22
+ // amd64:-`.*SP.*`
+ out |= in[7] << 25
+ // amd64:-`.*SP.*`
+ out |= in[8] << 28
+ // amd64:-`.*SP.*`
+ out |= in[9] << 31
+ // amd64:-`.*SP.*`
+ out |= in[10] << 34
+ // amd64:-`.*SP.*`
+ out |= in[11] << 37
+ // amd64:-`.*SP.*`
+ out |= in[12] << 40
+ // amd64:-`.*SP.*`
+ out |= in[13] << 43
+ // amd64:-`.*SP.*`
+ out |= in[14] << 46
+ // amd64:-`.*SP.*`
+ out |= in[15] << 49
+ // amd64:-`.*SP.*`
+ out |= in[16] << 52
+ // amd64:-`.*SP.*`
+ out |= in[17] << 55
+ // amd64:-`.*SP.*`
+ out |= in[18] << 58
+ // amd64:-`.*SP.*`
+ out |= in[19] << 61
+ return out
+}
diff --git a/test/codegen/issue63332.go b/test/codegen/issue63332.go
new file mode 100644
index 0000000..dbe671d
--- /dev/null
+++ b/test/codegen/issue63332.go
@@ -0,0 +1,14 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func issue63332(c chan int) {
+ x := 0
+ // amd64:-`MOVQ`
+ x += 2
+ c <- x
+}
diff --git a/test/codegen/logic.go b/test/codegen/logic.go
new file mode 100644
index 0000000..ac33f91
--- /dev/null
+++ b/test/codegen/logic.go
@@ -0,0 +1,41 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Test to make sure that (CMPQ (ANDQ x y) [0]) does not get rewritten to
+// (TESTQ x y) if the ANDQ has other uses. If that rewrite happens, then one
+// of the args of the ANDQ needs to be saved so it can be used as the arg to TESTQ.
+func andWithUse(x, y int) int {
+ z := x & y
+ // amd64:`TESTQ\s(AX, AX|BX, BX|CX, CX|DX, DX|SI, SI|DI, DI|R8, R8|R9, R9|R10, R10|R11, R11|R12, R12|R13, R13|R15, R15)`
+ if z == 0 {
+ return 77
+ }
+ // use z by returning it
+ return z
+}
+
+// Verify (OR x (NOT y)) rewrites to (ORN x y) where supported
+func ornot(x, y int) int {
+ // ppc64x:"ORN"
+ z := x | ^y
+ return z
+}
+
+// Verify that (OR (NOT x) (NOT y)) rewrites to (NOT (AND x y))
+func orDemorgans(x, y int) int {
+ // amd64:"AND",-"OR"
+ z := ^x | ^y
+ return z
+}
+
+// Verify that (AND (NOT x) (NOT y)) rewrites to (NOT (OR x y))
+func andDemorgans(x, y int) int {
+ // amd64:"OR",-"AND"
+ z := ^x & ^y
+ return z
+}
diff --git a/test/codegen/mapaccess.go b/test/codegen/mapaccess.go
new file mode 100644
index 0000000..3d494e7
--- /dev/null
+++ b/test/codegen/mapaccess.go
@@ -0,0 +1,484 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// These tests check that mapaccess calls are not used.
+// Issues #23661 and #24364.
+
+func mapCompoundAssignmentInt8() {
+ m := make(map[int8]int8, 0)
+ var k int8 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] += 67
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] -= 123
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] *= 45
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] |= 78
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] ^= 89
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] <<= 9
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] >>= 10
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]++
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]--
+}
+
+func mapCompoundAssignmentInt32() {
+ m := make(map[int32]int32, 0)
+ var k int32 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] += 67890
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] -= 123
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] *= 456
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] |= 78
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] ^= 89
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] <<= 9
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] >>= 10
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]++
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]--
+}
+
+func mapCompoundAssignmentInt64() {
+ m := make(map[int64]int64, 0)
+ var k int64 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] += 67890
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] -= 123
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] *= 456
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] |= 78
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] ^= 89
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] <<= 9
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] >>= 10
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]++
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]--
+}
+
+func mapCompoundAssignmentComplex128() {
+ m := make(map[complex128]complex128, 0)
+ var k complex128 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] += 67890
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] -= 123
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] *= 456
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]++
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k]--
+}
+
+func mapCompoundAssignmentString() {
+ m := make(map[string]string, 0)
+ var k string = "key"
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] += "value"
+}
+
+var sinkAppend bool
+
+func mapAppendAssignmentInt8() {
+ m := make(map[int8][]int8, 0)
+ var k int8 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1, 2, 3)
+
+ a := []int8{7, 8, 9, 0}
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], a...)
+
+ // Exceptions
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(a, m[k]...)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt32() {
+ m := make(map[int32][]int32, 0)
+ var k int32 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1, 2, 3)
+
+ a := []int32{7, 8, 9, 0}
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], a...)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k+1] = append(m[k+1], a...)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[-k] = append(m[-k], a...)
+
+ // Exceptions
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(a, m[k]...)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentInt64() {
+ m := make(map[int64][]int64, 0)
+ var k int64 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1, 2, 3)
+
+ a := []int64{7, 8, 9, 0}
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], a...)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k+1] = append(m[k+1], a...)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[-k] = append(m[-k], a...)
+
+ // Exceptions
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(a, m[k]...)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentComplex128() {
+ m := make(map[complex128][]complex128, 0)
+ var k complex128 = 0
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1)
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], 1, 2, 3)
+
+ a := []complex128{7, 8, 9, 0}
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], a...)
+
+ // Exceptions
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(a, m[k]...)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ sinkAppend, m[k] = !sinkAppend, append(m[k], 99)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(m[k+1], 100)
+}
+
+func mapAppendAssignmentString() {
+ m := make(map[string][]string, 0)
+ var k string = "key"
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], "1")
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], "1", "2", "3")
+
+ a := []string{"7", "8", "9", "0"}
+
+ // 386:-".*mapaccess"
+ // amd64:-".*mapaccess"
+ // arm:-".*mapaccess"
+ // arm64:-".*mapaccess"
+ m[k] = append(m[k], a...)
+
+ // Exceptions
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(a, m[k]...)
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ sinkAppend, m[k] = !sinkAppend, append(m[k], "99")
+
+ // 386:".*mapaccess"
+ // amd64:".*mapaccess"
+ // arm:".*mapaccess"
+ // arm64:".*mapaccess"
+ m[k] = append(m[k+"1"], "100")
+}
diff --git a/test/codegen/maps.go b/test/codegen/maps.go
new file mode 100644
index 0000000..2550579
--- /dev/null
+++ b/test/codegen/maps.go
@@ -0,0 +1,201 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// map types.
+
+// ------------------- //
+// Access Const //
+// ------------------- //
+
+// Direct use of constants in fast map access calls (Issue #19015).
+
+func AccessInt1(m map[int]int) int {
+ // amd64:"MOV[LQ]\t[$]5"
+ return m[5]
+}
+
+func AccessInt2(m map[int]int) bool {
+ // amd64:"MOV[LQ]\t[$]5"
+ _, ok := m[5]
+ return ok
+}
+
+func AccessString1(m map[string]int) int {
+ // amd64:`.*"abc"`
+ return m["abc"]
+}
+
+func AccessString2(m map[string]int) bool {
+ // amd64:`.*"abc"`
+ _, ok := m["abc"]
+ return ok
+}
+
+// ------------------- //
+// String Conversion //
+// ------------------- //
+
+func LookupStringConversionSimple(m map[string]int, bytes []byte) int {
+ // amd64:-`.*runtime\.slicebytetostring\(`
+ return m[string(bytes)]
+}
+
+func LookupStringConversionStructLit(m map[struct{ string }]int, bytes []byte) int {
+ // amd64:-`.*runtime\.slicebytetostring\(`
+ return m[struct{ string }{string(bytes)}]
+}
+
+func LookupStringConversionArrayLit(m map[[2]string]int, bytes []byte) int {
+ // amd64:-`.*runtime\.slicebytetostring\(`
+ return m[[2]string{string(bytes), string(bytes)}]
+}
+
+func LookupStringConversionNestedLit(m map[[1]struct{ s [1]string }]int, bytes []byte) int {
+ // amd64:-`.*runtime\.slicebytetostring\(`
+ return m[[1]struct{ s [1]string }{struct{ s [1]string }{s: [1]string{string(bytes)}}}]
+}
+
+func LookupStringConversionKeyedArrayLit(m map[[2]string]int, bytes []byte) int {
+ // amd64:-`.*runtime\.slicebytetostring\(`
+ return m[[2]string{0: string(bytes)}]
+}
+
+// ------------------- //
+// Map Clear //
+// ------------------- //
+
+// Optimization of map clear idiom (Issue #20138).
+
+func MapClearReflexive(m map[int]int) {
+ // amd64:`.*runtime\.mapclear`
+ // amd64:-`.*runtime\.mapiterinit`
+ for k := range m {
+ delete(m, k)
+ }
+}
+
+func MapClearIndirect(m map[int]int) {
+ s := struct{ m map[int]int }{m: m}
+ // amd64:`.*runtime\.mapclear`
+ // amd64:-`.*runtime\.mapiterinit`
+ for k := range s.m {
+ delete(s.m, k)
+ }
+}
+
+func MapClearPointer(m map[*byte]int) {
+ // amd64:`.*runtime\.mapclear`
+ // amd64:-`.*runtime\.mapiterinit`
+ for k := range m {
+ delete(m, k)
+ }
+}
+
+func MapClearNotReflexive(m map[float64]int) {
+ // amd64:`.*runtime\.mapiterinit`
+ // amd64:-`.*runtime\.mapclear`
+ for k := range m {
+ delete(m, k)
+ }
+}
+
+func MapClearInterface(m map[interface{}]int) {
+ // amd64:`.*runtime\.mapiterinit`
+ // amd64:-`.*runtime\.mapclear`
+ for k := range m {
+ delete(m, k)
+ }
+}
+
+func MapClearSideEffect(m map[int]int) int {
+ k := 0
+ // amd64:`.*runtime\.mapiterinit`
+ // amd64:-`.*runtime\.mapclear`
+ for k = range m {
+ delete(m, k)
+ }
+ return k
+}
+
+func MapLiteralSizing(x int) (map[int]int, map[int]int) {
+ // This is tested for internal/abi/maps.go:MapBucketCountBits={3,4,5}
+ // amd64:"MOVL\t[$]33,"
+ m := map[int]int{
+ 0: 0,
+ 1: 1,
+ 2: 2,
+ 3: 3,
+ 4: 4,
+ 5: 5,
+ 6: 6,
+ 7: 7,
+ 8: 8,
+ 9: 9,
+ 10: 10,
+ 11: 11,
+ 12: 12,
+ 13: 13,
+ 14: 14,
+ 15: 15,
+ 16: 16,
+ 17: 17,
+ 18: 18,
+ 19: 19,
+ 20: 20,
+ 21: 21,
+ 22: 22,
+ 23: 23,
+ 24: 24,
+ 25: 25,
+ 26: 26,
+ 27: 27,
+ 28: 28,
+ 29: 29,
+ 30: 30,
+ 31: 32,
+ 32: 32,
+ }
+ // amd64:"MOVL\t[$]33,"
+ n := map[int]int{
+ 0: 0,
+ 1: 1,
+ 2: 2,
+ 3: 3,
+ 4: 4,
+ 5: 5,
+ 6: 6,
+ 7: 7,
+ 8: 8,
+ 9: 9,
+ 10: 10,
+ 11: 11,
+ 12: 12,
+ 13: 13,
+ 14: 14,
+ 15: 15,
+ 16: 16,
+ 17: 17,
+ 18: 18,
+ 19: 19,
+ 20: 20,
+ 21: 21,
+ 22: 22,
+ 23: 23,
+ 24: 24,
+ 25: 25,
+ 26: 26,
+ 27: 27,
+ 28: 28,
+ 29: 29,
+ 30: 30,
+ 31: 32,
+ 32: 32,
+ }
+ return m, n
+}
diff --git a/test/codegen/math.go b/test/codegen/math.go
new file mode 100644
index 0000000..331ebbe
--- /dev/null
+++ b/test/codegen/math.go
@@ -0,0 +1,253 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math"
+
+var sink64 [8]float64
+
+func approx(x float64) {
+ // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+ // amd64:"ROUNDSD\t[$]2"
+ // s390x:"FIDBR\t[$]6"
+ // arm64:"FRINTPD"
+ // ppc64x:"FRIP"
+ // wasm:"F64Ceil"
+ sink64[0] = math.Ceil(x)
+
+ // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+ // amd64:"ROUNDSD\t[$]1"
+ // s390x:"FIDBR\t[$]7"
+ // arm64:"FRINTMD"
+ // ppc64x:"FRIM"
+ // wasm:"F64Floor"
+ sink64[1] = math.Floor(x)
+
+ // s390x:"FIDBR\t[$]1"
+ // arm64:"FRINTAD"
+ // ppc64x:"FRIN"
+ sink64[2] = math.Round(x)
+
+ // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+ // amd64:"ROUNDSD\t[$]3"
+ // s390x:"FIDBR\t[$]5"
+ // arm64:"FRINTZD"
+ // ppc64x:"FRIZ"
+ // wasm:"F64Trunc"
+ sink64[3] = math.Trunc(x)
+
+ // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41"
+ // amd64:"ROUNDSD\t[$]0"
+ // s390x:"FIDBR\t[$]4"
+ // arm64:"FRINTND"
+ // wasm:"F64Nearest"
+ sink64[4] = math.RoundToEven(x)
+}
+
+func sqrt(x float64) float64 {
+ // amd64:"SQRTSD"
+ // 386/sse2:"SQRTSD" 386/softfloat:-"SQRTD"
+ // arm64:"FSQRTD"
+ // arm/7:"SQRTD"
+ // mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD"
+ // mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD"
+ // wasm:"F64Sqrt"
+ // ppc64x:"FSQRT"
+ // riscv64: "FSQRTD"
+ return math.Sqrt(x)
+}
+
+func sqrt32(x float32) float32 {
+ // amd64:"SQRTSS"
+ // 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS"
+ // arm64:"FSQRTS"
+ // arm/7:"SQRTF"
+ // mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF"
+ // mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF"
+ // wasm:"F32Sqrt"
+ // ppc64x:"FSQRTS"
+ // riscv64: "FSQRTS"
+ return float32(math.Sqrt(float64(x)))
+}
+
+// Check that it's using integer registers
+func abs(x, y float64) {
+ // amd64:"BTRQ\t[$]63"
+ // arm64:"FABSD\t"
+ // s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
+ // ppc64x:"FABS\t"
+ // riscv64:"FABSD\t"
+ // wasm:"F64Abs"
+ // arm/6:"ABSD\t"
+ // mips64/hardfloat:"ABSD\t"
+ // mips/hardfloat:"ABSD\t"
+ sink64[0] = math.Abs(x)
+
+ // amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
+ // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
+ // ppc64x:"FNABS\t"
+ sink64[1] = -math.Abs(y)
+}
+
+// Check that it's using integer registers
+func abs32(x float32) float32 {
+ // s390x:"LPDFR",-"LDEBR",-"LEDBR" (no float64 conversion)
+ return float32(math.Abs(float64(x)))
+}
+
+// Check that it's using integer registers
+func copysign(a, b, c float64) {
+ // amd64:"BTRQ\t[$]63","ANDQ","ORQ"
+ // s390x:"CPSDR",-"MOVD" (no integer load/store)
+ // ppc64x:"FCPSGN"
+ // riscv64:"FSGNJD"
+ // wasm:"F64Copysign"
+ sink64[0] = math.Copysign(a, b)
+
+ // amd64:"BTSQ\t[$]63"
+ // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
+ // ppc64x:"FCPSGN"
+ // riscv64:"FSGNJD"
+ // arm64:"ORR", -"AND"
+ sink64[1] = math.Copysign(c, -1)
+
+ // Like math.Copysign(c, -1), but with integer operations. Useful
+ // for platforms that have a copysign opcode to see if it's detected.
+ // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
+ sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63)
+
+ // amd64:"ANDQ","ORQ"
+ // s390x:"CPSDR\t",-"MOVD\t" (no integer load/store)
+ // ppc64x:"FCPSGN"
+ // riscv64:"FSGNJD"
+ sink64[3] = math.Copysign(-1, c)
+}
+
+func fma(x, y, z float64) float64 {
+ // amd64/v3:-".*x86HasFMA"
+ // amd64:"VFMADD231SD"
+ // arm/6:"FMULAD"
+ // arm64:"FMADDD"
+ // s390x:"FMADD"
+ // ppc64x:"FMADD"
+ // riscv64:"FMADDD"
+ return math.FMA(x, y, z)
+}
+
+func fms(x, y, z float64) float64 {
+ // riscv64:"FMSUBD"
+ return math.FMA(x, y, -z)
+}
+
+func fnms(x, y, z float64) float64 {
+ // riscv64:"FNMSUBD",-"FNMADDD"
+ return math.FMA(-x, y, z)
+}
+
+func fnma(x, y, z float64) float64 {
+ // riscv64:"FNMADDD",-"FNMSUBD"
+ return math.FMA(x, -y, -z)
+}
+
+func fromFloat64(f64 float64) uint64 {
+ // amd64:"MOVQ\tX.*, [^X].*"
+ // arm64:"FMOVD\tF.*, R.*"
+ // ppc64x:"MFVSRD"
+ // mips64/hardfloat:"MOVV\tF.*, R.*"
+ return math.Float64bits(f64+1) + 1
+}
+
+func fromFloat32(f32 float32) uint32 {
+ // amd64:"MOVL\tX.*, [^X].*"
+ // arm64:"FMOVS\tF.*, R.*"
+ // mips64/hardfloat:"MOVW\tF.*, R.*"
+ return math.Float32bits(f32+1) + 1
+}
+
+func toFloat64(u64 uint64) float64 {
+ // amd64:"MOVQ\t[^X].*, X.*"
+ // arm64:"FMOVD\tR.*, F.*"
+ // ppc64x:"MTVSRD"
+ // mips64/hardfloat:"MOVV\tR.*, F.*"
+ return math.Float64frombits(u64+1) + 1
+}
+
+func toFloat32(u32 uint32) float32 {
+ // amd64:"MOVL\t[^X].*, X.*"
+ // arm64:"FMOVS\tR.*, F.*"
+ // mips64/hardfloat:"MOVW\tR.*, F.*"
+ return math.Float32frombits(u32+1) + 1
+}
+
+// Test that comparisons with constants converted to float
+// are evaluated at compile-time
+
+func constantCheck64() bool {
+ // amd64:"(MOVB\t[$]0)|(XORL\t[A-Z][A-Z0-9]+, [A-Z][A-Z0-9]+)",-"FCMP",-"MOVB\t[$]1"
+ // s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1,"
+ return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63))
+}
+
+func constantCheck32() bool {
+ // amd64:"MOV(B|L)\t[$]1",-"FCMP",-"MOV(B|L)\t[$]0"
+ // s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0,"
+ return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31))
+}
+
+// Test that integer constants are converted to floating point constants
+// at compile-time
+
+func constantConvert32(x float32) float32 {
+ // amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)"
+ // s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+ // ppc64x:"FMOVS\t[$]f32.3f800000\\(SB\\)"
+ // arm64:"FMOVS\t[$]\\(1.0\\)"
+ if x > math.Float32frombits(0x3f800000) {
+ return -x
+ }
+ return x
+}
+
+func constantConvertInt32(x uint32) uint32 {
+ // amd64:-"MOVSS"
+ // s390x:-"FMOVS"
+ // ppc64x:-"FMOVS"
+ // arm64:-"FMOVS"
+ if x > math.Float32bits(1) {
+ return -x
+ }
+ return x
+}
+
+func nanGenerate64() float64 {
+ // Test to make sure we don't generate a NaN while constant propagating.
+ // See issue 36400.
+ zero := 0.0
+ // amd64:-"DIVSD"
+ inf := 1 / zero // +inf. We can constant propagate this one.
+ negone := -1.0
+
+ // amd64:"DIVSD"
+ z0 := zero / zero
+ // amd64:"MULSD"
+ z1 := zero * inf
+ // amd64:"SQRTSD"
+ z2 := math.Sqrt(negone)
+ return z0 + z1 + z2
+}
+
+func nanGenerate32() float32 {
+ zero := float32(0.0)
+ // amd64:-"DIVSS"
+ inf := 1 / zero // +inf. We can constant propagate this one.
+
+ // amd64:"DIVSS"
+ z0 := zero / zero
+ // amd64:"MULSS"
+ z1 := zero * inf
+ return z0 + z1
+}
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
new file mode 100644
index 0000000..184d608
--- /dev/null
+++ b/test/codegen/mathbits.go
@@ -0,0 +1,869 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ----------------------- //
+// bits.LeadingZeros //
+// ----------------------- //
+
+func LeadingZeros(n uint) int {
+ // amd64/v1,amd64/v2:"BSRQ"
+ // amd64/v3:"LZCNTQ", -"BSRQ"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"CNTLZD"
+ return bits.LeadingZeros(n)
+}
+
+func LeadingZeros64(n uint64) int {
+ // amd64/v1,amd64/v2:"BSRQ"
+ // amd64/v3:"LZCNTQ", -"BSRQ"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"CNTLZD"
+ return bits.LeadingZeros64(n)
+}
+
+func LeadingZeros32(n uint32) int {
+ // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL",- "BSRL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZW"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"CNTLZW"
+ return bits.LeadingZeros32(n)
+}
+
+func LeadingZeros16(n uint16) int {
+ // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL",- "BSRL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"CNTLZD"
+ return bits.LeadingZeros16(n)
+}
+
+func LeadingZeros8(n uint8) int {
+ // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL",- "BSRL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"CNTLZD"
+ return bits.LeadingZeros8(n)
+}
+
+// --------------- //
+// bits.Len* //
+// --------------- //
+
+func Len(n uint) int {
+ // amd64/v1,amd64/v2:"BSRQ"
+ // amd64/v3: "LZCNTQ"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"SUBC","CNTLZD"
+ return bits.Len(n)
+}
+
+func Len64(n uint64) int {
+ // amd64/v1,amd64/v2:"BSRQ"
+ // amd64/v3: "LZCNTQ"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"SUBC","CNTLZD"
+ return bits.Len64(n)
+}
+
+func SubFromLen64(n uint64) int {
+ // ppc64x:"CNTLZD",-"SUBC"
+ return 64 - bits.Len64(n)
+}
+
+func Len32(n uint32) int {
+ // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x: "CNTLZW"
+ return bits.Len32(n)
+}
+
+func Len16(n uint16) int {
+ // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"SUBC","CNTLZD"
+ return bits.Len16(n)
+}
+
+func Len8(n uint8) int {
+ // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ"
+ // amd64/v3: "LZCNTL"
+ // s390x:"FLOGR"
+ // arm:"CLZ" arm64:"CLZ"
+ // mips:"CLZ"
+ // wasm:"I64Clz"
+ // ppc64x:"SUBC","CNTLZD"
+ return bits.Len8(n)
+}
+
+// -------------------- //
+// bits.OnesCount //
+// -------------------- //
+
+// TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested.
+func OnesCount(n uint) int {
+ // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+ // amd64:"POPCNTQ"
+ // arm64:"VCNT","VUADDLV"
+ // s390x:"POPCNT"
+ // ppc64x:"POPCNTD"
+ // wasm:"I64Popcnt"
+ return bits.OnesCount(n)
+}
+
+func OnesCount64(n uint64) int {
+ // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+ // amd64:"POPCNTQ"
+ // arm64:"VCNT","VUADDLV"
+ // s390x:"POPCNT"
+ // ppc64x:"POPCNTD"
+ // wasm:"I64Popcnt"
+ return bits.OnesCount64(n)
+}
+
+func OnesCount32(n uint32) int {
+ // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+ // amd64:"POPCNTL"
+ // arm64:"VCNT","VUADDLV"
+ // s390x:"POPCNT"
+ // ppc64x:"POPCNTW"
+ // wasm:"I64Popcnt"
+ return bits.OnesCount32(n)
+}
+
+func OnesCount16(n uint16) int {
+ // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
+ // amd64:"POPCNTL"
+ // arm64:"VCNT","VUADDLV"
+ // s390x:"POPCNT"
+ // ppc64x:"POPCNTW"
+ // wasm:"I64Popcnt"
+ return bits.OnesCount16(n)
+}
+
+func OnesCount8(n uint8) int {
+ // s390x:"POPCNT"
+ // ppc64x:"POPCNTB"
+ // wasm:"I64Popcnt"
+ return bits.OnesCount8(n)
+}
+
+// ----------------------- //
+// bits.ReverseBytes //
+// ----------------------- //
+
+func ReverseBytes(n uint) uint {
+ // amd64:"BSWAPQ"
+ // 386:"BSWAPL"
+ // s390x:"MOVDBR"
+ // arm64:"REV"
+ return bits.ReverseBytes(n)
+}
+
+func ReverseBytes64(n uint64) uint64 {
+ // amd64:"BSWAPQ"
+ // 386:"BSWAPL"
+ // s390x:"MOVDBR"
+ // arm64:"REV"
+ // ppc64x/power10: "BRD"
+ return bits.ReverseBytes64(n)
+}
+
+func ReverseBytes32(n uint32) uint32 {
+ // amd64:"BSWAPL"
+ // 386:"BSWAPL"
+ // s390x:"MOVWBR"
+ // arm64:"REVW"
+ // ppc64x/power10: "BRW"
+ return bits.ReverseBytes32(n)
+}
+
+func ReverseBytes16(n uint16) uint16 {
+ // amd64:"ROLW"
+ // arm64:"REV16W",-"UBFX",-"ORR"
+ // arm/5:"SLL","SRL","ORR"
+ // arm/6:"REV16"
+ // arm/7:"REV16"
+ // ppc64x/power10: "BRH"
+ return bits.ReverseBytes16(n)
+}
+
+// --------------------- //
+// bits.RotateLeft //
+// --------------------- //
+
+func RotateLeft64(n uint64) uint64 {
+ // amd64:"ROLQ"
+ // arm64:"ROR"
+ // ppc64x:"ROTL"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
+ // wasm:"I64Rotl"
+ return bits.RotateLeft64(n, 37)
+}
+
+func RotateLeft32(n uint32) uint32 {
+ // amd64:"ROLL" 386:"ROLL"
+ // arm:`MOVW\tR[0-9]+@>23`
+ // arm64:"RORW"
+ // ppc64x:"ROTLW"
+ // s390x:"RLL"
+ // wasm:"I32Rotl"
+ return bits.RotateLeft32(n, 9)
+}
+
+func RotateLeft16(n uint16, s int) uint16 {
+ // amd64:"ROLW" 386:"ROLW"
+ // arm64:"RORW",-"CSEL"
+ return bits.RotateLeft16(n, s)
+}
+
+func RotateLeft8(n uint8, s int) uint8 {
+ // amd64:"ROLB" 386:"ROLB"
+ // arm64:"LSL","LSR",-"CSEL"
+ return bits.RotateLeft8(n, s)
+}
+
+func RotateLeftVariable(n uint, m int) uint {
+ // amd64:"ROLQ"
+ // arm64:"ROR"
+ // ppc64x:"ROTL"
+ // s390x:"RLLG"
+ // wasm:"I64Rotl"
+ return bits.RotateLeft(n, m)
+}
+
+func RotateLeftVariable64(n uint64, m int) uint64 {
+ // amd64:"ROLQ"
+ // arm64:"ROR"
+ // ppc64x:"ROTL"
+ // s390x:"RLLG"
+ // wasm:"I64Rotl"
+ return bits.RotateLeft64(n, m)
+}
+
+func RotateLeftVariable32(n uint32, m int) uint32 {
+ // arm:`MOVW\tR[0-9]+@>R[0-9]+`
+ // amd64:"ROLL"
+ // arm64:"RORW"
+ // ppc64x:"ROTLW"
+ // s390x:"RLL"
+ // wasm:"I32Rotl"
+ return bits.RotateLeft32(n, m)
+}
+
+// ------------------------ //
+// bits.TrailingZeros //
+// ------------------------ //
+
+func TrailingZeros(n uint) int {
+ // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+ // amd64/v3:"TZCNTQ"
+ // 386:"BSFL"
+ // arm:"CLZ"
+ // arm64:"RBIT","CLZ"
+ // s390x:"FLOGR"
+ // ppc64x/power8:"ANDN","POPCNTD"
+ // ppc64x/power9: "CNTTZD"
+ // wasm:"I64Ctz"
+ return bits.TrailingZeros(n)
+}
+
+func TrailingZeros64(n uint64) int {
+ // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+ // amd64/v3:"TZCNTQ"
+ // 386:"BSFL"
+ // arm64:"RBIT","CLZ"
+ // s390x:"FLOGR"
+ // ppc64x/power8:"ANDN","POPCNTD"
+ // ppc64x/power9: "CNTTZD"
+ // wasm:"I64Ctz"
+ return bits.TrailingZeros64(n)
+}
+
+func TrailingZeros64Subtract(n uint64) int {
+ // ppc64x/power8:"NEG","SUBC","ANDN","POPCNTD"
+ // ppc64x/power9:"SUBC","CNTTZD"
+ return bits.TrailingZeros64(1 - n)
+}
+
+func TrailingZeros32(n uint32) int {
+ // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
+ // amd64/v3:"TZCNTL"
+ // 386:"BSFL"
+ // arm:"CLZ"
+ // arm64:"RBITW","CLZW"
+ // s390x:"FLOGR","MOVWZ"
+ // ppc64x/power8:"ANDN","POPCNTW"
+ // ppc64x/power9: "CNTTZW"
+ // wasm:"I64Ctz"
+ return bits.TrailingZeros32(n)
+}
+
+func TrailingZeros16(n uint16) int {
+ // amd64:"BSFL","ORL\\t\\$65536"
+ // 386:"BSFL\t"
+ // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
+ // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
+ // s390x:"FLOGR","OR\t\\$65536"
+ // ppc64x/power8:"POPCNTD","ORIS\\t\\$1"
+ // ppc64x/power9:"CNTTZD","ORIS\\t\\$1"
+ // wasm:"I64Ctz"
+ return bits.TrailingZeros16(n)
+}
+
+func TrailingZeros8(n uint8) int {
+ // amd64:"BSFL","ORL\\t\\$256"
+ // 386:"BSFL"
+ // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
+ // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
+ // s390x:"FLOGR","OR\t\\$256"
+ // wasm:"I64Ctz"
+ return bits.TrailingZeros8(n)
+}
+
+// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
+
+func IterateBits(n uint) int {
+ i := 0
+ for n != 0 {
+ // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
+ // amd64/v3:"TZCNTQ"
+ i += bits.TrailingZeros(n)
+ n &= n - 1
+ }
+ return i
+}
+
+func IterateBits64(n uint64) int {
+ i := 0
+ for n != 0 {
+ // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ"
+ // amd64/v3:"TZCNTQ"
+ i += bits.TrailingZeros64(n)
+ n &= n - 1
+ }
+ return i
+}
+
+func IterateBits32(n uint32) int {
+ i := 0
+ for n != 0 {
+ // amd64/v1,amd64/v2:"BSFL",-"BTSQ"
+ // amd64/v3:"TZCNTL"
+ i += bits.TrailingZeros32(n)
+ n &= n - 1
+ }
+ return i
+}
+
+func IterateBits16(n uint16) int {
+ i := 0
+ for n != 0 {
+ // amd64/v1,amd64/v2:"BSFL",-"BTSL"
+ // amd64/v3:"TZCNTL"
+ // arm64:"RBITW","CLZW",-"ORR"
+ i += bits.TrailingZeros16(n)
+ n &= n - 1
+ }
+ return i
+}
+
+func IterateBits8(n uint8) int {
+ i := 0
+ for n != 0 {
+ // amd64/v1,amd64/v2:"BSFL",-"BTSL"
+ // amd64/v3:"TZCNTL"
+ // arm64:"RBITW","CLZW",-"ORR"
+ i += bits.TrailingZeros8(n)
+ n &= n - 1
+ }
+ return i
+}
+
+// --------------- //
+// bits.Add* //
+// --------------- //
+
+func Add(x, y, ci uint) (r, co uint) {
+ // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // ppc64x: "ADDC", "ADDE", "ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // riscv64: "ADD","SLTU"
+ return bits.Add(x, y, ci)
+}
+
+func AddC(x, ci uint) (r, co uint) {
+ // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // loong64: "ADDV", "SGTU"
+ // ppc64x: "ADDC", "ADDE", "ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // mips64:"ADDV","SGTU"
+ // riscv64: "ADD","SLTU"
+ return bits.Add(x, 7, ci)
+}
+
+func AddZ(x, y uint) (r, co uint) {
+ // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+ // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+ // loong64: "ADDV", "SGTU"
+ // ppc64x: "ADDC", -"ADDE", "ADDZE"
+ // s390x:"ADDC",-"ADDC\t[$]-1,"
+ // mips64:"ADDV","SGTU"
+ // riscv64: "ADD","SLTU"
+ return bits.Add(x, y, 0)
+}
+
+func AddR(x, y, ci uint) uint {
+ // arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+ // loong64: "ADDV", -"SGTU"
+ // ppc64x: "ADDC", "ADDE", -"ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // mips64:"ADDV",-"SGTU"
+ // riscv64: "ADD",-"SLTU"
+ r, _ := bits.Add(x, y, ci)
+ return r
+}
+
+func AddM(p, q, r *[3]uint) {
+ var c uint
+ r[0], c = bits.Add(p[0], q[0], c)
+ // arm64:"ADCS",-"ADD\t",-"CMP"
+ // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+ // s390x:"ADDE",-"ADDC\t[$]-1,"
+ r[1], c = bits.Add(p[1], q[1], c)
+ r[2], c = bits.Add(p[2], q[2], c)
+}
+
+func Add64(x, y, ci uint64) (r, co uint64) {
+ // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // loong64: "ADDV", "SGTU"
+ // ppc64x: "ADDC", "ADDE", "ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // mips64:"ADDV","SGTU"
+ // riscv64: "ADD","SLTU"
+ return bits.Add64(x, y, ci)
+}
+
+func Add64C(x, ci uint64) (r, co uint64) {
+ // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+ // loong64: "ADDV", "SGTU"
+ // ppc64x: "ADDC", "ADDE", "ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // mips64:"ADDV","SGTU"
+ // riscv64: "ADD","SLTU"
+ return bits.Add64(x, 7, ci)
+}
+
+func Add64Z(x, y uint64) (r, co uint64) {
+ // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
+ // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+ // loong64: "ADDV", "SGTU"
+ // ppc64x: "ADDC", -"ADDE", "ADDZE"
+ // s390x:"ADDC",-"ADDC\t[$]-1,"
+ // mips64:"ADDV","SGTU"
+ // riscv64: "ADD","SLTU"
+ return bits.Add64(x, y, 0)
+}
+
+func Add64R(x, y, ci uint64) uint64 {
+ // arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
+ // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+ // loong64: "ADDV", -"SGTU"
+ // ppc64x: "ADDC", "ADDE", -"ADDZE"
+ // s390x:"ADDE","ADDC\t[$]-1,"
+ // mips64:"ADDV",-"SGTU"
+ // riscv64: "ADD",-"SLTU"
+ r, _ := bits.Add64(x, y, ci)
+ return r
+}
+func Add64M(p, q, r *[3]uint64) {
+ var c uint64
+ r[0], c = bits.Add64(p[0], q[0], c)
+ // arm64:"ADCS",-"ADD\t",-"CMP"
+ // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
+ // ppc64x: -"ADDC", "ADDE", -"ADDZE"
+ // s390x:"ADDE",-"ADDC\t[$]-1,"
+ r[1], c = bits.Add64(p[1], q[1], c)
+ r[2], c = bits.Add64(p[2], q[2], c)
+}
+
+func Add64MSaveC(p, q, r, c *[2]uint64) {
+ // ppc64x: "ADDC\tR", "ADDZE"
+ r[0], c[0] = bits.Add64(p[0], q[0], 0)
+ // ppc64x: "ADDC\t[$]-1", "ADDE", "ADDZE"
+ r[1], c[1] = bits.Add64(p[1], q[1], c[0])
+}
+
+func Add64PanicOnOverflowEQ(a, b uint64) uint64 {
+ r, c := bits.Add64(a, b, 0)
+ // s390x:"BRC\t[$]3,",-"ADDE"
+ if c == 1 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Add64PanicOnOverflowNE(a, b uint64) uint64 {
+ r, c := bits.Add64(a, b, 0)
+ // s390x:"BRC\t[$]3,",-"ADDE"
+ if c != 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Add64PanicOnOverflowGT(a, b uint64) uint64 {
+ r, c := bits.Add64(a, b, 0)
+ // s390x:"BRC\t[$]3,",-"ADDE"
+ if c > 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Add64(a[0], b[0], c)
+ r[1], c = bits.Add64(a[1], b[1], c)
+ // s390x:"BRC\t[$]3,"
+ if c == 1 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Add64(a[0], b[0], c)
+ r[1], c = bits.Add64(a[1], b[1], c)
+ // s390x:"BRC\t[$]3,"
+ if c != 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Add64(a[0], b[0], c)
+ r[1], c = bits.Add64(a[1], b[1], c)
+ // s390x:"BRC\t[$]3,"
+ if c > 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+// Verify independent carry chain operations are scheduled efficiently
+// and do not cause unnecessary save/restore of the CA bit.
+//
+// This is an example of why CarryChainTail priority must be lower
+// (earlier in the block) than Memory. f[0]=f1 could be scheduled
+// after the first two lower 64 bit limb adds, but before either
+// high 64 bit limbs are added.
+//
+// This is what happened on PPC64 when compiling
+// crypto/internal/edwards25519/field.feMulGeneric.
+func Add64MultipleChains(a, b, c, d [2]uint64) {
+ var cx, d1, d2 uint64
+ a1, a2 := a[0], a[1]
+ b1, b2 := b[0], b[1]
+ c1, c2 := c[0], c[1]
+
+ // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+ d1, cx = bits.Add64(a1, b1, 0)
+ // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+ d2, _ = bits.Add64(a2, b2, cx)
+
+ // ppc64x: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER"
+ d1, cx = bits.Add64(c1, d1, 0)
+ // ppc64x: "ADDE", -"ADDC", -"MOVD\t.*, XER"
+ d2, _ = bits.Add64(c2, d2, cx)
+ d[0] = d1
+ d[1] = d2
+}
+
+// --------------- //
+// bits.Sub* //
+// --------------- //
+
+func Sub(x, y, ci uint) (r, co uint) {
+ // amd64:"NEGL","SBBQ","NEGQ"
+ // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+ // s390x:"SUBE"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub(x, y, ci)
+}
+
+func SubC(x, ci uint) (r, co uint) {
+ // amd64:"NEGL","SBBQ","NEGQ"
+ // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+ // s390x:"SUBE"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub(x, 7, ci)
+}
+
+func SubZ(x, y uint) (r, co uint) {
+ // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+ // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
+ // s390x:"SUBC"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub(x, y, 0)
+}
+
+func SubR(x, y, ci uint) uint {
+ // amd64:"NEGL","SBBQ",-"NEGQ"
+ // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV",-"SGTU"
+ // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // s390x:"SUBE"
+ // riscv64: "SUB",-"SLTU"
+ r, _ := bits.Sub(x, y, ci)
+ return r
+}
+func SubM(p, q, r *[3]uint) {
+ var c uint
+ r[0], c = bits.Sub(p[0], q[0], c)
+ // amd64:"SBBQ",-"NEGL",-"NEGQ"
+ // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+ // ppc64x:-"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // s390x:"SUBE"
+ r[1], c = bits.Sub(p[1], q[1], c)
+ r[2], c = bits.Sub(p[2], q[2], c)
+}
+
+func Sub64(x, y, ci uint64) (r, co uint64) {
+ // amd64:"NEGL","SBBQ","NEGQ"
+ // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+ // s390x:"SUBE"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub64(x, y, ci)
+}
+
+func Sub64C(x, ci uint64) (r, co uint64) {
+ // amd64:"NEGL","SBBQ","NEGQ"
+ // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", "SUBE", "SUBZE", "NEG"
+ // s390x:"SUBE"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub64(x, 7, ci)
+}
+
+func Sub64Z(x, y uint64) (r, co uint64) {
+ // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+ // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+ // loong64:"SUBV","SGTU"
+ // ppc64x:"SUBC", -"SUBE", "SUBZE", "NEG"
+ // s390x:"SUBC"
+ // mips64:"SUBV","SGTU"
+ // riscv64: "SUB","SLTU"
+ return bits.Sub64(x, y, 0)
+}
+
+func Sub64R(x, y, ci uint64) uint64 {
+ // amd64:"NEGL","SBBQ",-"NEGQ"
+ // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+ // loong64:"SUBV",-"SGTU"
+ // ppc64x:"SUBC", "SUBE", -"SUBZE", -"NEG"
+ // s390x:"SUBE"
+ // riscv64: "SUB",-"SLTU"
+ r, _ := bits.Sub64(x, y, ci)
+ return r
+}
+func Sub64M(p, q, r *[3]uint64) {
+ var c uint64
+ r[0], c = bits.Sub64(p[0], q[0], c)
+ // amd64:"SBBQ",-"NEGL",-"NEGQ"
+ // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
+ // s390x:"SUBE"
+ r[1], c = bits.Sub64(p[1], q[1], c)
+ r[2], c = bits.Sub64(p[2], q[2], c)
+}
+
+func Sub64MSaveC(p, q, r, c *[2]uint64) {
+ // ppc64x:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG"
+ r[0], c[0] = bits.Sub64(p[0], q[0], 0)
+ // ppc64x:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG"
+ r[1], c[1] = bits.Sub64(p[1], q[1], c[0])
+}
+
+func Sub64PanicOnOverflowEQ(a, b uint64) uint64 {
+ r, b := bits.Sub64(a, b, 0)
+ // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+ if b == 1 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Sub64PanicOnOverflowNE(a, b uint64) uint64 {
+ r, b := bits.Sub64(a, b, 0)
+ // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+ if b != 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Sub64PanicOnOverflowGT(a, b uint64) uint64 {
+ r, b := bits.Sub64(a, b, 0)
+ // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE"
+ if b > 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Sub64(a[0], b[0], c)
+ r[1], c = bits.Sub64(a[1], b[1], c)
+ // s390x:"BRC\t[$]12,"
+ if c == 1 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Sub64(a[0], b[0], c)
+ r[1], c = bits.Sub64(a[1], b[1], c)
+ // s390x:"BRC\t[$]12,"
+ if c != 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
+ var r [2]uint64
+ var c uint64
+ r[0], c = bits.Sub64(a[0], b[0], c)
+ r[1], c = bits.Sub64(a[1], b[1], c)
+ // s390x:"BRC\t[$]12,"
+ if c > 0 {
+ panic("overflow")
+ }
+ return r
+}
+
+// --------------- //
+// bits.Mul* //
+// --------------- //
+
+func Mul(x, y uint) (hi, lo uint) {
+ // amd64:"MULQ"
+ // arm64:"UMULH","MUL"
+ // ppc64x:"MULHDU","MULLD"
+ // s390x:"MLGR"
+ // mips64: "MULVU"
+ return bits.Mul(x, y)
+}
+
+func Mul64(x, y uint64) (hi, lo uint64) {
+ // amd64:"MULQ"
+ // arm64:"UMULH","MUL"
+ // ppc64x:"MULHDU","MULLD"
+ // s390x:"MLGR"
+ // mips64: "MULVU"
+ // riscv64:"MULHU","MUL"
+ return bits.Mul64(x, y)
+}
+
+func Mul64HiOnly(x, y uint64) uint64 {
+ // arm64:"UMULH",-"MUL"
+ // riscv64:"MULHU",-"MUL\t"
+ hi, _ := bits.Mul64(x, y)
+ return hi
+}
+
+func Mul64LoOnly(x, y uint64) uint64 {
+ // arm64:"MUL",-"UMULH"
+ // riscv64:"MUL\t",-"MULHU"
+ _, lo := bits.Mul64(x, y)
+ return lo
+}
+
+// --------------- //
+// bits.Div* //
+// --------------- //
+
+func Div(hi, lo, x uint) (q, r uint) {
+ // amd64:"DIVQ"
+ return bits.Div(hi, lo, x)
+}
+
+func Div32(hi, lo, x uint32) (q, r uint32) {
+ // arm64:"ORR","UDIV","MSUB",-"UREM"
+ return bits.Div32(hi, lo, x)
+}
+
+func Div64(hi, lo, x uint64) (q, r uint64) {
+ // amd64:"DIVQ"
+ return bits.Div64(hi, lo, x)
+}
+
+func Div64degenerate(x uint64) (q, r uint64) {
+ // amd64:-"DIVQ"
+ return bits.Div64(0, x, 5)
+}
diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go
new file mode 100644
index 0000000..6d6c33d
--- /dev/null
+++ b/test/codegen/memcombine.go
@@ -0,0 +1,920 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import (
+ "encoding/binary"
+ "runtime"
+)
+
+// ------------- //
+// Loading //
+// ------------- //
+
+func load_le64(b []byte) uint64 {
+ // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR`
+ // s390x:`MOVDBR\s\(.*\),`
+ // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
+ // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
+ // ppc64:`MOVDBR\s`,-`MOV[BHW]Z`
+ return binary.LittleEndian.Uint64(b)
+}
+
+func load_le64_idx(b []byte, idx int) uint64 {
+ // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR`
+ // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
+ // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
+ // ppc64:`MOVDBR\s`,-`MOV[BHW]Z\s`
+ return binary.LittleEndian.Uint64(b[idx:])
+}
+
+func load_le32(b []byte) uint32 {
+ // amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+ // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR`
+ // s390x:`MOVWBR\s\(.*\),`
+ // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
+ // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+ // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s`
+ return binary.LittleEndian.Uint32(b)
+}
+
+func load_le32_idx(b []byte, idx int) uint32 {
+ // amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+ // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR`
+ // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
+ // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s`
+ // ppc64:`MOVWBR\s`,-`MOV[BH]Z\s'
+ return binary.LittleEndian.Uint32(b[idx:])
+}
+
+func load_le16(b []byte) uint16 {
+ // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ\s`,-`MOVBZ`
+ // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+ // s390x:`MOVHBR\s\(.*\),`
+ // ppc64:`MOVHBR\s`,-`MOVBZ`
+ return binary.LittleEndian.Uint16(b)
+}
+
+func load_le16_idx(b []byte, idx int) uint16 {
+ // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ\s`,-`MOVBZ`
+ // ppc64:`MOVHBR\s`,-`MOVBZ`
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+ // s390x:`MOVHBR\s\(.*\)\(.*\*1\),`
+ return binary.LittleEndian.Uint16(b[idx:])
+}
+
+func load_be64(b []byte) uint64 {
+ // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+ // amd64/v3:`MOVBEQ`
+ // s390x:`MOVD\s\(.*\),`
+ // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+ // ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+ // ppc64:`MOVD`,-`MOV[BHW]Z`
+ return binary.BigEndian.Uint64(b)
+}
+
+func load_be64_idx(b []byte, idx int) uint64 {
+ // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+ // amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ // s390x:`MOVD\s\(.*\)\(.*\*1\),`
+ // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
+ // ppc64le:`MOVDBR`,-`MOV[BHW]Z`
+ // ppc64:`MOVD`,-`MOV[BHW]Z`
+ return binary.BigEndian.Uint64(b[idx:])
+}
+
+func load_be32(b []byte) uint32 {
+ // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
+ // amd64/v3: `MOVBEL`
+ // s390x:`MOVWZ\s\(.*\),`
+ // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
+ // ppc64le:`MOVWBR`,-`MOV[BH]Z`
+ // ppc64:`MOVWZ`,-MOV[BH]Z`
+ return binary.BigEndian.Uint32(b)
+}
+
+func load_be32_idx(b []byte, idx int) uint32 {
+ // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR`
+ // amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
+ // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
+ // ppc64le:`MOVWBR`,-`MOV[BH]Z`
+ // ppc64:`MOVWZ`,-MOV[BH]Z`
+ return binary.BigEndian.Uint32(b[idx:])
+}
+
+func load_be16(b []byte) uint16 {
+ // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+ // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
+ // ppc64le:`MOVHBR`,-`MOVBZ`
+ // ppc64:`MOVHZ`,-`MOVBZ`
+ // s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+ return binary.BigEndian.Uint16(b)
+}
+
+func load_be16_idx(b []byte, idx int) uint16 {
+ // amd64:`ROLW\s\$8`,-`MOVB`,-`OR`
+ // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
+ // ppc64le:`MOVHBR`,-`MOVBZ`
+ // ppc64:`MOVHZ`,-`MOVBZ`
+ // s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW`
+ return binary.BigEndian.Uint16(b[idx:])
+}
+
+func load_le_byte2_uint16(s []byte) uint16 {
+ // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+ // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
+ // ppc64:`MOVHBR`,-`MOVBZ`
+ return uint16(s[0]) | uint16(s[1])<<8
+}
+
+func load_le_byte2_uint16_inv(s []byte) uint16 {
+ // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB`
+ // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ`
+ // ppc64:`MOVHBR`,-`MOVBZ`
+ return uint16(s[1])<<8 | uint16(s[0])
+}
+
+func load_le_byte4_uint32(s []byte) uint32 {
+ // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+ // 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ // ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z`
+ // ppc64:`MOVWBR`,-MOV[BH]Z`
+ return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+}
+
+func load_le_byte4_uint32_inv(s []byte) uint32 {
+ // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+ // ppc64le:`MOVWZ`,-`MOV[BH]Z`
+ // ppc64:`MOVWBR`,-`MOV[BH]Z`
+ return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0])
+}
+
+func load_le_byte8_uint64(s []byte) uint64 {
+ // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+ // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
+ // ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+ // ppc64:`MOVDBR`,-`MOVW[WHB]Z`
+ return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
+}
+
+func load_le_byte8_uint64_inv(s []byte) uint64 {
+ // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+ // ppc64le:`MOVD`,-`MOV[WHB]Z`
+ // ppc64:`MOVDBR`,-`MOV[WHB]Z`
+ return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0])
+}
+
+func load_be_byte2_uint16(s []byte) uint16 {
+ // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+ // ppc64:`MOVHZ`,-`MOVBZ`
+ return uint16(s[0])<<8 | uint16(s[1])
+}
+
+func load_be_byte2_uint16_inv(s []byte) uint16 {
+ // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
+ // ppc64:`MOVHZ`,-`MOVBZ`
+ return uint16(s[1]) | uint16(s[0])<<8
+}
+
+func load_be_byte4_uint32(s []byte) uint32 {
+ // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+ // ppc64le:`MOVWBR`,-`MOV[HB]Z`
+ // ppc64:`MOVWZ`,-`MOV[HB]Z`
+ return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3])
+}
+
+func load_be_byte4_uint32_inv(s []byte) uint32 {
+ // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
+ // amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
+ // amd64/v3: `MOVBEL`
+ // ppc64le:`MOVWBR`,-`MOV[HB]Z`
+ // ppc64:`MOVWZ`,-`MOV[HB]Z`
+ return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
+}
+
+func load_be_byte8_uint64(s []byte) uint64 {
+ // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+ // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+ // ppc64:`MOVD`,-`MOV[WHB]Z`
+ return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7])
+}
+
+func load_be_byte8_uint64_inv(s []byte) uint64 {
+ // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
+ // amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
+ // amd64/v3: `MOVBEQ`
+ // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
+ // ppc64:`MOVD`,-`MOV[BHW]Z`
+ return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
+}
+
+func load_le_byte2_uint16_idx(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+ // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ`,-`MOVBZ`
+ // ppc64:`MOVHBR`,-`MOVBZ`
+ return uint16(s[idx]) | uint16(s[idx+1])<<8
+}
+
+func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB`
+ // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+ // ppc64le:`MOVHZ`,-`MOVBZ`
+ // ppc64:`MOVHBR`,-`MOVBZ`
+ return uint16(s[idx+1])<<8 | uint16(s[idx])
+}
+
+func load_le_byte4_uint32_idx(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+ // amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR`
+ return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24
+}
+
+func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]`
+ return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx])
+}
+
+func load_le_byte8_uint64_idx(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+ // amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR`
+ return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56
+}
+
+func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]`
+ return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx])
+}
+
+func load_be_byte2_uint16_idx(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+ return uint16(s[idx])<<8 | uint16(s[idx+1])
+}
+
+func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
+ // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR`
+ return uint16(s[idx+1]) | uint16(s[idx])<<8
+}
+
+func load_be_byte4_uint32_idx(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+ return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3])
+}
+
+func load_be_byte8_uint64_idx(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+ return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7])
+}
+
+func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+ return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
+}
+
+func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
+ return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1])
+}
+
+func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+ return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
+}
+
+func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
+ return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2])
+}
+
+func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+ return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
+}
+
+func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
+ return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3])
+}
+
+func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+ return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1])
+}
+
+func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 {
+ // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB`
+ return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8
+}
+
+func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 {
+ // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W`
+ return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3])
+}
+
+func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 {
+ // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W`
+ return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7])
+}
+
+// Some tougher cases for the memcombine pass.
+
+func reassoc_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24)
+}
+
+func extrashift_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR`
+ return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26
+}
+
+func outoforder_load_uint32(b []byte) uint32 {
+ // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24
+}
+
+func extraOr_load_uint32(b []byte, x, y uint32) uint32 {
+ // amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]`
+ return x | binary.LittleEndian.Uint32(b) | y
+ // TODO: Note that
+ // x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y
+ // doesn't work because it associates in a way that memcombine can't detect it.
+}
+
+// Check load combining across function calls.
+
+func fcall_byte(a [2]byte) [2]byte {
+ return fcall_byte(fcall_byte(a)) // amd64:`MOVW`
+}
+
+func fcall_uint16(a [2]uint16) [2]uint16 {
+ return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL`
+}
+
+func fcall_uint32(a [2]uint32) [2]uint32 {
+ return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ`
+}
+
+// We want to merge load+op in the first function, but not in the
+// second. See Issue 19595.
+func load_op_merge(p, q *int) {
+ x := *p // amd64:`ADDQ\t\(`
+ *q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724).
+}
+func load_op_no_merge(p, q *int) {
+ x := *p
+ for i := 0; i < 10; i++ {
+ *q += x // amd64:`ADDQ\t[A-Z]`
+ }
+}
+
+// Make sure offsets are folded into loads and stores.
+func offsets_fold(_, a [20]byte) (b [20]byte) {
+ // arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)`
+ b = a
+ return
+}
+
+// Make sure we don't put pointers in SSE registers across safe
+// points.
+
+func safe_point(p, q *[2]*int) {
+ a, b := p[0], p[1] // amd64:-`MOVUPS`
+ runtime.GC()
+ q[0], q[1] = a, b // amd64:-`MOVUPS`
+}
+
+// ------------- //
+// Storing //
+// ------------- //
+
+func store_le64(b []byte, x uint64) {
+ // amd64:`MOVQ\s.*\(.*\)$`,-`SHR.`
+ // arm64:`MOVD`,-`MOV[WBH]`
+ // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+ // ppc64:`MOVDBR`,-MOVB\s`
+ // s390x:`MOVDBR\s.*\(.*\)$`
+ binary.LittleEndian.PutUint64(b, x)
+}
+
+func store_le64_idx(b []byte, x uint64, idx int) {
+ // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
+ // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
+ // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
+ // ppc64:`MOVDBR`,-`MOVBZ`
+ // s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$`
+ binary.LittleEndian.PutUint64(b[idx:], x)
+}
+
+func store_le64_idx2(dst []byte, d, length, offset int) []byte {
+ a := dst[d : d+length]
+ b := dst[d-offset:]
+ // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
+ binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b))
+ return dst
+}
+
+func store_le64_idx_const(b []byte, idx int) {
+ // amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$`
+ binary.LittleEndian.PutUint64(b[idx:], 123)
+}
+
+func store_le64_load(b []byte, x *[8]byte) {
+ _ = b[8]
+ // amd64:-`MOV[BWL]`
+ // arm64:-`MOV[BWH]`
+ // ppc64le:`MOVD\s`,-`MOV[BWH]Z`
+ // ppc64:`MOVDBR`
+ // s390x:-`MOVB`,-`MOV[WH]BR`
+ binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:]))
+}
+
+func store_le32(b []byte, x uint32) {
+ // amd64:`MOVL\s`
+ // arm64:`MOVW`,-`MOV[BH]`
+ // ppc64le:`MOVW\s`
+ // ppc64:`MOVWBR`
+ // s390x:`MOVWBR\s.*\(.*\)$`
+ binary.LittleEndian.PutUint32(b, x)
+}
+
+func store_le32_idx(b []byte, x uint32, idx int) {
+ // amd64:`MOVL\s`
+ // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
+ // ppc64le:`MOVW\s`
+ // ppc64:`MOVWBR`
+ // s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$`
+ binary.LittleEndian.PutUint32(b[idx:], x)
+}
+
+func store_le32_idx_const(b []byte, idx int) {
+ // amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$`
+ // ppc64x:`MOVW\s`,-MOV[HB]`
+ binary.LittleEndian.PutUint32(b[idx:], 123)
+}
+
+func store_le16(b []byte, x uint16) {
+ // amd64:`MOVW\s`
+ // arm64:`MOVH`,-`MOVB`
+ // ppc64le:`MOVH\s`
+ // ppc64:`MOVHBR`
+ // s390x:`MOVHBR\s.*\(.*\)$`
+ binary.LittleEndian.PutUint16(b, x)
+}
+
+func store_le16_idx(b []byte, x uint16, idx int) {
+ // amd64:`MOVW\s`
+ // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+ // ppc64le:`MOVH\s`
+ // ppc64:`MOVHBR\s`
+ // s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$`
+ binary.LittleEndian.PutUint16(b[idx:], x)
+}
+
+func store_le16_idx_const(b []byte, idx int) {
+ // amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$`
+ // ppc64x:`MOVH\s`
+ binary.LittleEndian.PutUint16(b[idx:], 123)
+}
+
+func store_be64(b []byte, x uint64) {
+ // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
+ // amd64/v3: `MOVBEQ`
+ // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
+ // ppc64le:`MOVDBR`
+ // ppc64:`MOVD\s`
+ // s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint64(b, x)
+}
+
+func store_be64_idx(b []byte, x uint64, idx int) {
+ // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.`
+ // amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
+ // ppc64le:`MOVDBR`
+ // ppc64:`MOVD\s`
+ // s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint64(b[idx:], x)
+}
+
+func store_be32(b []byte, x uint32) {
+ // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
+ // amd64/v3:`MOVBEL`
+ // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
+ // ppc64le:`MOVWBR`
+ // ppc64:`MOVW\s`
+ // s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint32(b, x)
+}
+
+func store_be64_load(b, x *[8]byte) {
+ // arm64:-`REV`
+ // amd64:-`BSWAPQ`
+ binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:]))
+}
+
+func store_be32_load(b, x *[8]byte) {
+ // arm64:-`REVW`
+ // amd64:-`BSWAPL`
+ binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:]))
+}
+
+func store_be32_idx(b []byte, x uint32, idx int) {
+ // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.`
+ // amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
+ // ppc64le:`MOVWBR`
+ // ppc64:`MOVW\s`
+ // s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint32(b[idx:], x)
+}
+
+func store_be16(b []byte, x uint16) {
+ // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+ // amd64/v3:`MOVBEW`,-`ROLW`
+ // arm64:`MOVH`,`REV16W`,-`MOVB`
+ // ppc64le:`MOVHBR`
+ // ppc64:`MOVH\s`
+ // s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint16(b, x)
+}
+
+func store_be16_idx(b []byte, x uint16, idx int) {
+ // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.`
+ // amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
+ // ppc64le:`MOVHBR`
+ // ppc64:`MOVH\s`
+ // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s`
+ binary.BigEndian.PutUint16(b[idx:], x)
+}
+
+func store_le_byte_2(b []byte, val uint16) {
+ _ = b[2]
+ // arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+ // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+ // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+ // ppc64le:`MOVH\s`,-`MOVB`
+ // ppc64:`MOVHBR`,-`MOVB`
+ b[1], b[2] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_2_inv(b []byte, val uint16) {
+ _ = b[2]
+ // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+ // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+ // ppc64le:`MOVH\s`,-`MOVB`
+ // ppc64:`MOVHBR`,-`MOVB`
+ b[2], b[1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_4(b []byte, val uint32) {
+ _ = b[4]
+ // arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+ // 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+ // amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+ // ppc64le:`MOVW\s`
+ // ppc64:`MOVWBR\s`
+ b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24)
+}
+
+func store_le_byte_8(b []byte, val uint64) {
+ _ = b[8]
+ // arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`
+ // amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+ // ppc64le:`MOVD\s`,-`MOVW`
+ // ppc64:`MOVDBR\s`
+ b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56)
+}
+
+func store_be_byte_2(b []byte, val uint16) {
+ _ = b[2]
+ // arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`
+ // amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`
+ // amd64/v3: `MOVBEW`
+ // ppc64le:`MOVHBR`
+ // ppc64:`MOVH\s`
+ b[1], b[2] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4(b []byte, val uint32) {
+ _ = b[4]
+ // arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+ // amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`
+ // amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)`
+ // ppc64le:`MOVWBR`
+ // ppc64:`MOVW\s`
+ b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_8(b []byte, val uint64) {
+ _ = b[8]
+ // arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW`
+ // amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL`
+ // amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL`
+ // ppc64le:`MOVDBR`
+ // ppc64:`MOVD`
+ b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx(b []byte, idx int, val uint16) {
+ _, _ = b[idx+0], b[idx+1]
+ // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+ // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+ // ppc64le:`MOVH\s`
+ // ppc64:`MOVHBR`
+ b[idx+1], b[idx+0] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) {
+ _, _ = b[idx+0], b[idx+1]
+ // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB`
+ // ppc64le:`MOVH\s`
+ // ppc64:`MOVHBR`
+ b[idx+0], b[idx+1] = byte(val), byte(val>>8)
+}
+
+func store_le_byte_4_idx(b []byte, idx int, val uint32) {
+ _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+ // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`
+ // ppc64le:`MOVW\s`
+ // ppc64:`MOVWBR`
+ b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx(b []byte, idx int, val uint16) {
+ _, _ = b[idx+0], b[idx+1]
+ // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+ // ppc64le:`MOVHBR`
+ // ppc64:`MOVH\s`
+ b[idx+0], b[idx+1] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx(b []byte, idx int, val uint32) {
+ _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3]
+ // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W`
+ // ppc64le:`MOVWBR`
+ // ppc64:`MOVW\s`
+ b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_be_byte_2_idx2(b []byte, idx int, val uint16) {
+ _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+ // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+ // ppc64le:`MOVHBR`
+ // ppc64:`MOVH\s`
+ b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val)
+}
+
+func store_le_byte_2_idx2(b []byte, idx int, val uint16) {
+ _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+ // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+ // ppc64le:`MOVH\s`
+ // ppc64:`MOVHBR`
+ b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val)
+}
+
+func store_be_byte_4_idx4(b []byte, idx int, val uint32) {
+ _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+ // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W`
+ // ppc64le:`MOVWBR`
+ // ppc64:`MOVW\s`
+ b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) {
+ _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3]
+ // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`
+ // ppc64le:`MOVW\s`
+ // ppc64:`MOVWBR`
+ b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val)
+}
+
+// ------------- //
+// Zeroing //
+// ------------- //
+
+// Check that zero stores are combined into larger stores
+
+func zero_byte_2(b1, b2 []byte) {
+ // bounds checks to guarantee safety of writes below
+ _, _ = b1[1], b2[1]
+ // arm64:"MOVH\tZR",-"MOVB"
+ // amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+ // 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVH\s`
+ b1[0], b1[1] = 0, 0
+ // arm64:"MOVH\tZR",-"MOVB"
+ // 386:`MOVW\s[$]0,\s\([A-Z]+\)`
+ // amd64:`MOVW\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVH`
+ b2[1], b2[0] = 0, 0
+}
+
+func zero_byte_4(b1, b2 []byte) {
+ _, _ = b1[3], b2[3]
+ // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+ // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVW\s`
+ b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0
+ // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+ // ppc64x:`MOVW\s`
+ b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0
+}
+
+func zero_byte_8(b []byte) {
+ _ = b[7]
+ b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ b[4], b[5], b[6], b[7] = 0, 0, 0, 0
+}
+
+func zero_byte_16(b []byte) {
+ _ = b[15]
+ b[0], b[1], b[2], b[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+ b[4], b[5], b[6], b[7] = 0, 0, 0, 0
+ b[8], b[9], b[10], b[11] = 0, 0, 0, 0
+ b[12], b[13], b[14], b[15] = 0, 0, 0, 0
+}
+
+func zero_byte_30(a *[30]byte) {
+ *a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_39(a *[39]byte) {
+ *a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW"
+}
+
+func zero_byte_2_idx(b []byte, idx int) {
+ _, _ = b[idx+0], b[idx+1]
+ // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
+ // ppc64x:`MOVH\s`
+ b[idx+0], b[idx+1] = 0, 0
+}
+
+func zero_byte_2_idx2(b []byte, idx int) {
+ _, _ = b[(idx<<1)+0], b[(idx<<1)+1]
+ // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
+ // ppc64x:`MOVH\s`
+ b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0
+}
+
+func zero_uint16_2(h1, h2 []uint16) {
+ _, _ = h1[1], h2[1]
+ // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+ // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVW\s`
+ h1[0], h1[1] = 0, 0
+ // arm64:"MOVW\tZR",-"MOVB",-"MOVH"
+ // amd64:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // 386:`MOVL\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVW`
+ h2[1], h2[0] = 0, 0
+}
+
+func zero_uint16_4(h1, h2 []uint16) {
+ _, _ = h1[3], h2[3]
+ // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVD\s`
+ h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0
+ // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ // ppc64x:`MOVD\s`
+ h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0
+}
+
+func zero_uint16_8(h []uint16) {
+ _ = h[7]
+ h[0], h[1], h[2], h[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+ h[4], h[5], h[6], h[7] = 0, 0, 0, 0
+}
+
+func zero_uint32_2(w1, w2 []uint32) {
+ _, _ = w1[1], w2[1]
+ // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVD\s`
+ w1[0], w1[1] = 0, 0
+ // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW"
+ // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)`
+ // ppc64x:`MOVD\s`
+ w2[1], w2[0] = 0, 0
+}
+
+func zero_uint32_4(w1, w2 []uint32) {
+ _, _ = w1[3], w2[3]
+ w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+ w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func zero_uint64_2(d1, d2 []uint64) {
+ _, _ = d1[1], d2[1]
+ d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+ d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH"
+}
+
+func loadstore(p, q *[4]uint8) {
+ // amd64:"MOVL",-"MOVB"
+ // arm64:"MOVWU",-"MOVBU"
+ x0, x1, x2, x3 := q[0], q[1], q[2], q[3]
+ // amd64:"MOVL",-"MOVB"
+ // arm64:"MOVW",-"MOVB"
+ p[0], p[1], p[2], p[3] = x0, x1, x2, x3
+}
+
+type S1 struct {
+ a, b int16
+}
+
+func loadstore2(p, q *S1) {
+ // amd64:"MOVL",-"MOVWLZX"
+ // arm64:"MOVWU",-"MOVH"
+ a, b := p.a, p.b
+ // amd64:"MOVL",-"MOVW"
+ // arm64:"MOVW",-"MOVH"
+ q.a, q.b = a, b
+}
+
+func wideStore(p *[8]uint64) {
+ if p == nil {
+ return
+ }
+
+ // amd64:"MOVUPS",-"MOVQ"
+ // arm64:"STP",-"MOVD"
+ p[0] = 0
+ // amd64:-"MOVUPS",-"MOVQ"
+ // arm64:-"STP",-"MOVD"
+ p[1] = 0
+}
+
+func wideStore2(p *[8]uint64, x, y uint64) {
+ if p == nil {
+ return
+ }
+
+ // s390x:"STMG"
+ p[0] = x
+ // s390x:-"STMG",-"MOVD"
+ p[1] = y
+}
+
+func store32le(p *struct{ a, b uint32 }, x uint64) {
+ // amd64:"MOVQ",-"MOVL",-"SHRQ"
+ // arm64:"MOVD",-"MOVW",-"LSR"
+ // ppc64le:"MOVD",-"MOVW",-"SRD"
+ p.a = uint32(x)
+ // amd64:-"MOVL",-"SHRQ"
+ // arm64:-"MOVW",-"LSR"
+ // ppc64le:-"MOVW",-"SRD"
+ p.b = uint32(x >> 32)
+}
+func store32be(p *struct{ a, b uint32 }, x uint64) {
+ // ppc64:"MOVD",-"MOVW",-"SRD"
+ // s390x:"MOVD",-"MOVW",-"SRD"
+ p.a = uint32(x >> 32)
+ // ppc64:-"MOVW",-"SRD"
+ // s390x:-"MOVW",-"SRD"
+ p.b = uint32(x)
+}
+func store16le(p *struct{ a, b uint16 }, x uint32) {
+ // amd64:"MOVL",-"MOVW",-"SHRL"
+ // arm64:"MOVW",-"MOVH",-"UBFX"
+ // ppc64le:"MOVW",-"MOVH",-"SRW"
+ p.a = uint16(x)
+ // amd64:-"MOVW",-"SHRL"
+ // arm64:-"MOVH",-"UBFX"
+ // ppc64le:-"MOVH",-"SRW"
+ p.b = uint16(x >> 16)
+}
+func store16be(p *struct{ a, b uint16 }, x uint32) {
+ // ppc64:"MOVW",-"MOVH",-"SRW"
+ // s390x:"MOVW",-"MOVH",-"SRW"
+ p.a = uint16(x >> 16)
+ // ppc64:-"MOVH",-"SRW"
+ // s390x:-"MOVH",-"SRW"
+ p.b = uint16(x)
+}
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
new file mode 100644
index 0000000..e5e89c2
--- /dev/null
+++ b/test/codegen/memops.go
@@ -0,0 +1,403 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var x [2]bool
+var x8 [2]uint8
+var x16 [2]uint16
+var x32 [2]uint32
+var x64 [2]uint64
+
+func compMem1() int {
+ // amd64:`CMPB\tcommand-line-arguments.x\+1\(SB\), [$]0`
+ if x[1] {
+ return 1
+ }
+ // amd64:`CMPB\tcommand-line-arguments.x8\+1\(SB\), [$]7`
+ if x8[1] == 7 {
+ return 1
+ }
+ // amd64:`CMPW\tcommand-line-arguments.x16\+2\(SB\), [$]7`
+ if x16[1] == 7 {
+ return 1
+ }
+ // amd64:`CMPL\tcommand-line-arguments.x32\+4\(SB\), [$]7`
+ if x32[1] == 7 {
+ return 1
+ }
+ // amd64:`CMPQ\tcommand-line-arguments.x64\+8\(SB\), [$]7`
+ if x64[1] == 7 {
+ return 1
+ }
+ return 0
+}
+
+type T struct {
+ x bool
+ x8 uint8
+ x16 uint16
+ x32 uint32
+ x64 uint64
+ a [2]int // force it passed in memory
+}
+
+func compMem2(t T) int {
+ // amd64:`CMPB\t.*\(SP\), [$]0`
+ if t.x {
+ return 1
+ }
+ // amd64:`CMPB\t.*\(SP\), [$]7`
+ if t.x8 == 7 {
+ return 1
+ }
+ // amd64:`CMPW\t.*\(SP\), [$]7`
+ if t.x16 == 7 {
+ return 1
+ }
+ // amd64:`CMPL\t.*\(SP\), [$]7`
+ if t.x32 == 7 {
+ return 1
+ }
+ // amd64:`CMPQ\t.*\(SP\), [$]7`
+ if t.x64 == 7 {
+ return 1
+ }
+ return 0
+}
+
+func compMem3(x, y *int) (int, bool) {
+ // We can do comparisons of a register with memory even if
+ // the register is used subsequently.
+ r := *x
+ // amd64:`CMPQ\t\(`
+ // 386:`CMPL\t\(`
+ return r, r < *y
+}
+
+// The following functions test that indexed load/store operations get generated.
+
+func idxInt8(x, y []int8, i int) {
+ var t int8
+ // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ // 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ t = x[i+1]
+ // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ // 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ y[i+1] = t
+ // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ // 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ x[i+1] = 77
+}
+
+func idxInt16(x, y []int16, i int) {
+ var t int16
+ // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+ // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+ t = x[i+1]
+ // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+ // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+ y[i+1] = t
+ // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+ // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+ t = x[16*i+1]
+ // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+ // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+ y[16*i+1] = t
+ // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+ // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)`
+ x[i+1] = 77
+ // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+ // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)`
+ x[16*i+1] = 77
+}
+
+func idxInt32(x, y []int32, i int) {
+ var t int32
+ // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ t = x[i+1]
+ // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ y[i+1] = t
+ // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ t = x[2*i+1]
+ // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ y[2*i+1] = t
+ // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+ // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+ t = x[16*i+1]
+ // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ y[16*i+1] = t
+ // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+1] = 77
+ // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ x[16*i+1] = 77
+}
+
+func idxInt64(x, y []int64, i int) {
+ var t int64
+ // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ t = x[i+1]
+ // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ y[i+1] = t
+ // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+ t = x[16*i+1]
+ // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+ y[16*i+1] = t
+ // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+1] = 77
+ // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+ x[16*i+1] = 77
+}
+
+func idxFloat32(x, y []float32, i int) {
+ var t float32
+ // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ // arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
+ t = x[i+1]
+ // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ // arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
+ y[i+1] = t
+ // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+ // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
+ t = x[16*i+1]
+ // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)`
+ y[16*i+1] = t
+}
+
+func idxFloat64(x, y []float64, i int) {
+ var t float64
+ // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ // arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
+ t = x[i+1]
+ // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ // arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
+ y[i+1] = t
+ // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+ // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
+ t = x[16*i+1]
+ // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+ // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)`
+ y[16*i+1] = t
+}
+
+func idxLoadPlusOp32(x []int32, i int) int32 {
+ s := x[0]
+ // 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ADDL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s += x[i+1]
+ // 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `SUBL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s -= x[i+2]
+ // 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ s *= x[i+3]
+ // 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ANDL\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s &= x[i+4]
+ // 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ORL\t20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s |= x[i+5]
+ // 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `XORL\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ s ^= x[i+6]
+ return s
+}
+
+func idxLoadPlusOp64(x []int64, i int) int64 {
+ s := x[0]
+ // amd64: `ADDQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s += x[i+1]
+ // amd64: `SUBQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s -= x[i+2]
+ // amd64: `ANDQ\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s &= x[i+3]
+ // amd64: `ORQ\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s |= x[i+4]
+ // amd64: `XORQ\t40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s ^= x[i+5]
+ return s
+}
+
+func idxStorePlusOp32(x []int32, i int, v int32) {
+ // 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ADDL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+1] += v
+ // 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `SUBL\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+2] -= v
+ // 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ANDL\t[A-Z]+[0-9]*, 12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+3] &= v
+ // 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ORL\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+4] |= v
+ // 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `XORL\t[A-Z]+[0-9]*, 20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+5] ^= v
+
+ // 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ADDL\t[$]77, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+6] += 77
+ // 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ANDL\t[$]77, 28\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+7] &= 77
+ // 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `ORL\t[$]77, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+8] |= 77
+ // 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)`
+ // amd64: `XORL\t[$]77, 36\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ x[i+9] ^= 77
+}
+
+func idxStorePlusOp64(x []int64, i int, v int64) {
+ // amd64: `ADDQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+1] += v
+ // amd64: `SUBQ\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+2] -= v
+ // amd64: `ANDQ\t[A-Z]+[0-9]*, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+3] &= v
+ // amd64: `ORQ\t[A-Z]+[0-9]*, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+4] |= v
+ // amd64: `XORQ\t[A-Z]+[0-9]*, 40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+5] ^= v
+
+ // amd64: `ADDQ\t[$]77, 48\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+6] += 77
+ // amd64: `ANDQ\t[$]77, 56\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+7] &= 77
+ // amd64: `ORQ\t[$]77, 64\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+8] |= 77
+ // amd64: `XORQ\t[$]77, 72\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ x[i+9] ^= 77
+}
+
+func idxCompare(i int) int {
+ // amd64: `MOVBLZX\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ if x8[i+1] < x8[0] {
+ return 0
+ }
+ // amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+ if x16[i+1] < x16[0] {
+ return 0
+ }
+ // amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+ if x16[16*i+1] < x16[0] {
+ return 0
+ }
+ // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ if x32[i+1] < x32[0] {
+ return 0
+ }
+ // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+ if x32[16*i+1] < x32[0] {
+ return 0
+ }
+ // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ if x64[i+1] < x64[0] {
+ return 0
+ }
+ // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+ if x64[16*i+1] < x64[0] {
+ return 0
+ }
+ // amd64: `MOVBLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*`
+ if x8[i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*`
+ if x16[i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*`
+ if x16[16*i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
+ if x32[i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*`
+ if x32[16*i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ if x64[i+2] < 77 {
+ return 0
+ }
+ // amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*`
+ if x64[16*i+2] < 77 {
+ return 0
+ }
+ return 1
+}
+
+func idxFloatOps(a []float64, b []float32, i int) (float64, float32) {
+ c := float64(7)
+ // amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ c += a[i+1]
+ // amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ c -= a[i+2]
+ // amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ c *= a[i+3]
+ // amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ c /= a[i+4]
+
+ d := float32(8)
+ // amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ d += b[i+1]
+ // amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ d -= b[i+2]
+ // amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ d *= b[i+3]
+ // amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ d /= b[i+4]
+ return c, d
+}
+
+func storeTest(a []bool, v int, i int) {
+ // amd64: `BTL\t\$0,`,`SETCS\t4\([A-Z]+[0-9]*\)`
+ a[4] = v&1 != 0
+ // amd64: `BTL\t\$1,`,`SETCS\t3\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)`
+ a[3+i] = v&2 != 0
+}
+
+func bitOps(p *[12]uint64) {
+ // amd64: `ORQ\t\$8, \(AX\)`
+ p[0] |= 8
+ // amd64: `ORQ\t\$1073741824, 8\(AX\)`
+ p[1] |= 1 << 30
+ // amd64: `BTSQ\t\$31, 16\(AX\)`
+ p[2] |= 1 << 31
+ // amd64: `BTSQ\t\$63, 24\(AX\)`
+ p[3] |= 1 << 63
+
+ // amd64: `ANDQ\t\$-9, 32\(AX\)`
+ p[4] &^= 8
+ // amd64: `ANDQ\t\$-1073741825, 40\(AX\)`
+ p[5] &^= 1 << 30
+ // amd64: `BTRQ\t\$31, 48\(AX\)`
+ p[6] &^= 1 << 31
+ // amd64: `BTRQ\t\$63, 56\(AX\)`
+ p[7] &^= 1 << 63
+
+ // amd64: `XORQ\t\$8, 64\(AX\)`
+ p[8] ^= 8
+ // amd64: `XORQ\t\$1073741824, 72\(AX\)`
+ p[9] ^= 1 << 30
+ // amd64: `BTCQ\t\$31, 80\(AX\)`
+ p[10] ^= 1 << 31
+ // amd64: `BTCQ\t\$63, 88\(AX\)`
+ p[11] ^= 1 << 63
+}
diff --git a/test/codegen/memops_bigoffset.go b/test/codegen/memops_bigoffset.go
new file mode 100644
index 0000000..d34531d
--- /dev/null
+++ b/test/codegen/memops_bigoffset.go
@@ -0,0 +1,71 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+type big1 struct {
+ w [1<<30 - 1]uint32
+}
+type big2 struct {
+ d [1<<29 - 1]uint64
+}
+
+func loadLargeOffset(sw *big1, sd *big2) (uint32, uint64) {
+
+ // ppc64x:`MOVWZ\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+ a3 := sw.w[1<<10]
+ // ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
+ // ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+ // ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+ b3 := sw.w[1<<16]
+ // ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
+ // ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+ // ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
+ c3 := sw.w[1<<28]
+ // ppc64x:`MOVWZ\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
+ d3 := sw.w[1<<29]
+ // ppc64x:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+ a4 := sd.d[1<<10]
+ // ppc64le/power10:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+ // ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+ b4 := sd.d[1<<16]
+ // ppc64le/power10`:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+ // ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
+ c4 := sd.d[1<<27]
+ // ppc64x:`MOVD\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
+ d4 := sd.d[1<<28]
+
+ return a3 + b3 + c3 + d3, a4 + b4 + c4 + d4
+}
+
+func storeLargeOffset(sw *big1, sd *big2) {
+ // ppc64x:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ sw.w[1<<10] = uint32(10)
+ // ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
+ // ppc64x/power8:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
+ sw.w[1<<16] = uint32(20)
+ // ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ // ppc64x/power8:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ sw.w[1<<28] = uint32(30)
+ // ppc64x:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`
+ sw.w[1<<29] = uint32(40)
+ // ppc64x:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ sd.d[1<<10] = uint64(40)
+ // ppc64le/power10:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ // ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ sd.d[1<<16] = uint64(50)
+ // ppc64le/power10`:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
+ // ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ // ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
+ sd.d[1<<27] = uint64(60)
+ // ppc64x:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`
+ sd.d[1<<28] = uint64(70)
+}
diff --git a/test/codegen/noextend.go b/test/codegen/noextend.go
new file mode 100644
index 0000000..193f75b
--- /dev/null
+++ b/test/codegen/noextend.go
@@ -0,0 +1,285 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+var sval64 [8]int64
+var sval32 [8]int32
+var sval16 [8]int16
+var sval8 [8]int8
+var val64 [8]uint64
+var val32 [8]uint32
+var val16 [8]uint16
+var val8 [8]uint8
+
+// Avoid zero/sign extensions following a load
+// which has extended the value correctly.
+// Note: No tests are done for int8 since
+// an extra extension is usually needed due to
+// no signed byte load.
+
+func set16(x8 int8, u8 *uint8, y8 int8, z8 uint8) {
+ // Truncate not needed, load does sign/zero extend
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ val16[0] = uint16(*u8)
+
+ // AND not needed due to size
+ // ppc64x:-"ANDCC"
+ sval16[1] = 255 & int16(x8+y8)
+
+ // ppc64x:-"ANDCC"
+ val16[1] = 255 & uint16(*u8+z8)
+
+}
+func shiftidx(u8 *uint8, x16 *int16, u16 *uint16) {
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ val16[0] = uint16(sval16[*u8>>2])
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ sval16[1] = int16(val16[*x16>>1])
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ val16[1] = uint16(sval16[*u16>>2])
+
+}
+
+func setnox(x8 int8, u8 *uint8, y8 *int8, z8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) {
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ val16[0] = uint16(*u8)
+
+ // AND not needed due to size
+ // ppc64x:-"ANDCC"
+ sval16[1] = 255 & int16(x8+*y8)
+
+ // ppc64x:-"ANDCC"
+ val16[1] = 255 & uint16(*u8+*z8)
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ sval32[1] = int32(*x16)
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ val32[0] = uint32(*u8)
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ val32[1] = uint32(*u16)
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ sval64[1] = int64(*x16)
+
+ // ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+ sval64[2] = int64(*x32)
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ val64[0] = uint64(*u8)
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ val64[1] = uint64(*u16)
+
+ // ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+ val64[2] = uint64(*u32)
+}
+
+func cmp16(u8 *uint8, x32 *int32, u32 *uint32, x64 *int64, u64 *uint64) bool {
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ if uint16(*u8) == val16[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if uint16(*u32>>16) == val16[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if uint16(*u64>>48) == val16[0] {
+ return true
+ }
+
+ // Verify the truncates are using the correct sign.
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if int16(*x32) == sval16[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ if uint16(*u32) == val16[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if int16(*x64) == sval16[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ if uint16(*u64) == val16[0] {
+ return true
+ }
+
+ return false
+}
+
+func cmp32(u8 *uint8, x16 *int16, u16 *uint16, x64 *int64, u64 *uint64) bool {
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ if uint32(*u8) == val32[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ if int32(*x16) == sval32[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if uint32(*u16) == val32[0] {
+ return true
+ }
+
+ // Verify the truncates are using the correct sign.
+ // ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+ if int32(*x64) == sval32[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+ if uint32(*u64) == val32[0] {
+ return true
+ }
+
+ return false
+}
+
+func cmp64(u8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) bool {
+
+ // ppc64x:-"MOVBZ\tR\\d+,\\sR\\d+"
+ if uint64(*u8) == val64[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVH\tR\\d+,\\sR\\d+"
+ if int64(*x16) == sval64[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVHZ\tR\\d+,\\sR\\d+"
+ if uint64(*u16) == val64[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVW\tR\\d+,\\sR\\d+"
+ if int64(*x32) == sval64[0] {
+ return true
+ }
+
+ // ppc64x:-"MOVWZ\tR\\d+,\\sR\\d+"
+ if uint64(*u32) == val64[0] {
+ return true
+ }
+ return false
+}
+
+// no unsign extension following 32 bits ops
+
+func noUnsignEXT(t1, t2, t3, t4 uint32, k int64) uint64 {
+ var ret uint64
+
+ // arm64:"RORW",-"MOVWU"
+ ret += uint64(bits.RotateLeft32(t1, 7))
+
+ // arm64:"MULW",-"MOVWU"
+ ret *= uint64(t1 * t2)
+
+ // arm64:"MNEGW",-"MOVWU"
+ ret += uint64(-t1 * t3)
+
+ // arm64:"UDIVW",-"MOVWU"
+ ret += uint64(t1 / t4)
+
+ // arm64:-"MOVWU"
+ ret += uint64(t2 % t3)
+
+ // arm64:"MSUBW",-"MOVWU"
+ ret += uint64(t1 - t2*t3)
+
+ // arm64:"MADDW",-"MOVWU"
+ ret += uint64(t3*t4 + t2)
+
+ // arm64:"REVW",-"MOVWU"
+ ret += uint64(bits.ReverseBytes32(t1))
+
+ // arm64:"RBITW",-"MOVWU"
+ ret += uint64(bits.Reverse32(t1))
+
+ // arm64:"CLZW",-"MOVWU"
+ ret += uint64(bits.LeadingZeros32(t1))
+
+ // arm64:"REV16W",-"MOVWU"
+ ret += uint64(((t1 & 0xff00ff00) >> 8) | ((t1 & 0x00ff00ff) << 8))
+
+ // arm64:"EXTRW",-"MOVWU"
+ ret += uint64((t1 << 25) | (t2 >> 7))
+
+ return ret
+}
+
+// no sign extension when the upper bits of the result are zero
+
+func noSignEXT(x int) int64 {
+ t1 := int32(x)
+
+ var ret int64
+
+ // arm64:-"MOVW"
+ ret += int64(t1 & 1)
+
+ // arm64:-"MOVW"
+ ret += int64(int32(x & 0x7fffffff))
+
+ // arm64:-"MOVH"
+ ret += int64(int16(x & 0x7fff))
+
+ // arm64:-"MOVB"
+ ret += int64(int8(x & 0x7f))
+
+ return ret
+}
+
+// corner cases that sign extension must not be omitted
+
+func shouldSignEXT(x int) int64 {
+ t1 := int32(x)
+
+ var ret int64
+
+ // arm64:"MOVW"
+ ret += int64(t1 & (-1))
+
+ // arm64:"MOVW"
+ ret += int64(int32(x & 0x80000000))
+
+ // arm64:"MOVW"
+ ret += int64(int32(x & 0x1100000011111111))
+
+ // arm64:"MOVH"
+ ret += int64(int16(x & 0x1100000000001111))
+
+ // arm64:"MOVB"
+ ret += int64(int8(x & 0x1100000000000011))
+
+ return ret
+}
+
+func noIntermediateExtension(a, b, c uint32) uint32 {
+ // arm64:-"MOVWU"
+ return a*b*9 + c
+}
diff --git a/test/codegen/race.go b/test/codegen/race.go
new file mode 100644
index 0000000..b977823
--- /dev/null
+++ b/test/codegen/race.go
@@ -0,0 +1,22 @@
+// asmcheck -race
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// Check that we elide racefuncenter/racefuncexit for
+// functions with no calls (but which might panic
+// in various ways). See issue 31219.
+// amd64:-"CALL.*racefuncenter.*"
+// arm64:-"CALL.*racefuncenter.*"
+// ppc64le:-"CALL.*racefuncenter.*"
+func RaceMightPanic(a []int, i, j, k, s int) {
+ var b [4]int
+ _ = b[i] // panicIndex
+ _ = a[i:j] // panicSlice
+ _ = a[i:j:k] // also panicSlice
+ _ = i << s // panicShift
+ _ = i / j // panicDivide
+}
diff --git a/test/codegen/regabi_regalloc.go b/test/codegen/regabi_regalloc.go
new file mode 100644
index 0000000..a7b7bd5
--- /dev/null
+++ b/test/codegen/regabi_regalloc.go
@@ -0,0 +1,23 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+//go:registerparams
+func f1(a, b int) {
+ // amd64:"MOVQ\tBX, CX", "MOVQ\tAX, BX", "MOVL\t\\$1, AX", -"MOVQ\t.*DX"
+ g(1, a, b)
+}
+
+//go:registerparams
+func f2(a, b int) {
+ // amd64:"MOVQ\tBX, AX", "MOVQ\t[AB]X, CX", -"MOVQ\t.*, BX"
+ g(b, b, b)
+}
+
+//go:noinline
+//go:registerparams
+func g(int, int, int) {}
diff --git a/test/codegen/retpoline.go b/test/codegen/retpoline.go
new file mode 100644
index 0000000..0e8f661
--- /dev/null
+++ b/test/codegen/retpoline.go
@@ -0,0 +1,43 @@
+// asmcheck -gcflags=-spectre=ret
+
+//go:build amd64
+
+package codegen
+
+func CallFunc(f func()) {
+ // amd64:`CALL\truntime.retpoline`
+ f()
+}
+
+func CallInterface(x interface{ M() }) {
+ // amd64:`CALL\truntime.retpoline`
+ x.M()
+}
+
+// Check to make sure that jump tables are disabled
+// when retpoline is on. See issue 57097.
+func noJumpTables(x int) int {
+ switch x {
+ case 0:
+ return 0
+ case 1:
+ return 1
+ case 2:
+ return 2
+ case 3:
+ return 3
+ case 4:
+ return 4
+ case 5:
+ return 5
+ case 6:
+ return 6
+ case 7:
+ return 7
+ case 8:
+ return 8
+ case 9:
+ return 9
+ }
+ return 10
+}
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go
new file mode 100644
index 0000000..5495f86
--- /dev/null
+++ b/test/codegen/rotate.go
@@ -0,0 +1,259 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "math/bits"
+
+// ------------------- //
+// const rotates //
+// ------------------- //
+
+func rot64(x uint64) uint64 {
+ var a uint64
+
+ // amd64:"ROLQ\t[$]7"
+ // ppc64x:"ROTL\t[$]7"
+ // loong64: "ROTRV\t[$]57"
+ a += x<<7 | x>>57
+
+ // amd64:"ROLQ\t[$]8"
+ // arm64:"ROR\t[$]56"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
+ // ppc64x:"ROTL\t[$]8"
+ // loong64: "ROTRV\t[$]56"
+ a += x<<8 + x>>56
+
+ // amd64:"ROLQ\t[$]9"
+ // arm64:"ROR\t[$]55"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
+ // ppc64x:"ROTL\t[$]9"
+ // loong64: "ROTRV\t[$]55"
+ a += x<<9 ^ x>>55
+
+ // amd64:"ROLQ\t[$]10"
+ // arm64:"ROR\t[$]54"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
+ // ppc64x:"ROTL\t[$]10"
+ // arm64:"ROR\t[$]54"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
+ // loong64: "ROTRV\t[$]54"
+ a += bits.RotateLeft64(x, 10)
+
+ return a
+}
+
+func rot32(x uint32) uint32 {
+ var a uint32
+
+ // amd64:"ROLL\t[$]7"
+ // arm:"MOVW\tR\\d+@>25"
+ // ppc64x:"ROTLW\t[$]7"
+ // loong64: "ROTR\t[$]25"
+ a += x<<7 | x>>25
+
+ // amd64:`ROLL\t[$]8`
+ // arm:"MOVW\tR\\d+@>24"
+ // arm64:"RORW\t[$]24"
+ // s390x:"RLL\t[$]8"
+ // ppc64x:"ROTLW\t[$]8"
+ // loong64: "ROTR\t[$]24"
+ a += x<<8 + x>>24
+
+ // amd64:"ROLL\t[$]9"
+ // arm:"MOVW\tR\\d+@>23"
+ // arm64:"RORW\t[$]23"
+ // s390x:"RLL\t[$]9"
+ // ppc64x:"ROTLW\t[$]9"
+ // loong64: "ROTR\t[$]23"
+ a += x<<9 ^ x>>23
+
+ // amd64:"ROLL\t[$]10"
+ // arm:"MOVW\tR\\d+@>22"
+ // arm64:"RORW\t[$]22"
+ // s390x:"RLL\t[$]10"
+ // ppc64x:"ROTLW\t[$]10"
+ // arm64:"RORW\t[$]22"
+ // s390x:"RLL\t[$]10"
+ // loong64: "ROTR\t[$]22"
+ a += bits.RotateLeft32(x, 10)
+
+ return a
+}
+
+func rot16(x uint16) uint16 {
+ var a uint16
+
+ // amd64:"ROLW\t[$]7"
+ a += x<<7 | x>>9
+
+ // amd64:`ROLW\t[$]8`
+ a += x<<8 + x>>8
+
+ // amd64:"ROLW\t[$]9"
+ a += x<<9 ^ x>>7
+
+ return a
+}
+
+func rot8(x uint8) uint8 {
+ var a uint8
+
+ // amd64:"ROLB\t[$]5"
+ a += x<<5 | x>>3
+
+ // amd64:`ROLB\t[$]6`
+ a += x<<6 + x>>2
+
+ // amd64:"ROLB\t[$]7"
+ a += x<<7 ^ x>>1
+
+ return a
+}
+
+// ----------------------- //
+// non-const rotates //
+// ----------------------- //
+
+func rot64nc(x uint64, z uint) uint64 {
+ var a uint64
+
+ z &= 63
+
+ // amd64:"ROLQ",-"AND"
+ // arm64:"ROR","NEG",-"AND"
+ // ppc64x:"ROTL",-"NEG",-"AND"
+ // loong64: "ROTRV", -"AND"
+ a += x<<z | x>>(64-z)
+
+ // amd64:"RORQ",-"AND"
+ // arm64:"ROR",-"NEG",-"AND"
+ // ppc64x:"ROTL","NEG",-"AND"
+ // loong64: "ROTRV", -"AND"
+ a += x>>z | x<<(64-z)
+
+ return a
+}
+
+func rot32nc(x uint32, z uint) uint32 {
+ var a uint32
+
+ z &= 31
+
+ // amd64:"ROLL",-"AND"
+ // arm64:"ROR","NEG",-"AND"
+ // ppc64x:"ROTLW",-"NEG",-"AND"
+ // loong64: "ROTR", -"AND"
+ a += x<<z | x>>(32-z)
+
+ // amd64:"RORL",-"AND"
+ // arm64:"ROR",-"NEG",-"AND"
+ // ppc64x:"ROTLW","NEG",-"AND"
+ // loong64: "ROTR", -"AND"
+ a += x>>z | x<<(32-z)
+
+ return a
+}
+
+func rot16nc(x uint16, z uint) uint16 {
+ var a uint16
+
+ z &= 15
+
+ // amd64:"ROLW",-"ANDQ"
+ a += x<<z | x>>(16-z)
+
+ // amd64:"RORW",-"ANDQ"
+ a += x>>z | x<<(16-z)
+
+ return a
+}
+
+func rot8nc(x uint8, z uint) uint8 {
+ var a uint8
+
+ z &= 7
+
+ // amd64:"ROLB",-"ANDQ"
+ a += x<<z | x>>(8-z)
+
+ // amd64:"RORB",-"ANDQ"
+ a += x>>z | x<<(8-z)
+
+ return a
+}
+
+// Issue 18254: rotate after inlining
+func f32(x uint32) uint32 {
+ // amd64:"ROLL\t[$]7"
+ return rot32nc(x, 7)
+}
+
+func doubleRotate(x uint64) uint64 {
+ x = (x << 5) | (x >> 59)
+ // amd64:"ROLQ\t[$]15"
+ // arm64:"ROR\t[$]49"
+ x = (x << 10) | (x >> 54)
+ return x
+}
+
+// --------------------------------------- //
+// Combined Rotate + Masking operations //
+// --------------------------------------- //
+
+func checkMaskedRotate32(a []uint32, r int) {
+ i := 0
+
+ // ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
+ i++
+ // ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
+ i++
+ // ppc64x: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
+ i++
+ // ppc64x: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
+ i++
+
+ // ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
+ i++
+ // ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], r) & 0xFF00
+ i++
+
+ // ppc64x: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], r) & 0xFFF00FFF
+ i++
+ // ppc64x: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], 4) & 0xFFF00FFF
+ i++
+}
+
+// combined arithmetic and rotate on arm64
+func checkArithmeticWithRotate(a *[1000]uint64) {
+ // arm64: "AND\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[2] = a[1] & bits.RotateLeft64(a[0], 13)
+ // arm64: "ORR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[5] = a[4] | bits.RotateLeft64(a[3], 13)
+ // arm64: "EOR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[8] = a[7] ^ bits.RotateLeft64(a[6], 13)
+ // arm64: "MVN\tR[0-9]+@>51, R[0-9]+"
+ a[10] = ^bits.RotateLeft64(a[9], 13)
+ // arm64: "BIC\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[13] = a[12] &^ bits.RotateLeft64(a[11], 13)
+ // arm64: "EON\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[16] = a[15] ^ ^bits.RotateLeft64(a[14], 13)
+ // arm64: "ORN\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
+ a[19] = a[18] | ^bits.RotateLeft64(a[17], 13)
+ // arm64: "TST\tR[0-9]+@>51, R[0-9]+"
+ if a[18]&bits.RotateLeft64(a[19], 13) == 0 {
+ a[20] = 1
+ }
+
+}
diff --git a/test/codegen/select.go b/test/codegen/select.go
new file mode 100644
index 0000000..82f6d1c
--- /dev/null
+++ b/test/codegen/select.go
@@ -0,0 +1,20 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func f() {
+ ch1 := make(chan int)
+ ch2 := make(chan int)
+ for {
+ // amd64:-`MOVQ\t[$]0, command-line-arguments..autotmp_3`
+ select {
+ case <-ch1:
+ case <-ch2:
+ default:
+ }
+ }
+}
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
new file mode 100644
index 0000000..50d6042
--- /dev/null
+++ b/test/codegen/shift.go
@@ -0,0 +1,476 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// ------------------ //
+// constant shifts //
+// ------------------ //
+
+func lshConst64x64(v int64) int64 {
+ // ppc64x:"SLD"
+ // riscv64:"SLLI",-"AND",-"SLTIU"
+ return v << uint64(33)
+}
+
+func rshConst64Ux64(v uint64) uint64 {
+ // ppc64x:"SRD"
+ // riscv64:"SRLI\t",-"AND",-"SLTIU"
+ return v >> uint64(33)
+}
+
+func rshConst64Ux64Overflow32(v uint32) uint64 {
+ // riscv64:"MOV\t\\$0,",-"SRL"
+ return uint64(v) >> 32
+}
+
+func rshConst64Ux64Overflow16(v uint16) uint64 {
+ // riscv64:"MOV\t\\$0,",-"SRL"
+ return uint64(v) >> 16
+}
+
+func rshConst64Ux64Overflow8(v uint8) uint64 {
+ // riscv64:"MOV\t\\$0,",-"SRL"
+ return uint64(v) >> 8
+}
+
+func rshConst64x64(v int64) int64 {
+ // ppc64x:"SRAD"
+ // riscv64:"SRAI\t",-"OR",-"SLTIU"
+ return v >> uint64(33)
+}
+
+func rshConst64x64Overflow32(v int32) int64 {
+ // riscv64:"SRAIW",-"SLLI",-"SRAI\t"
+ return int64(v) >> 32
+}
+
+func rshConst64x64Overflow16(v int16) int64 {
+ // riscv64:"SLLI","SRAI",-"SRAIW"
+ return int64(v) >> 16
+}
+
+func rshConst64x64Overflow8(v int8) int64 {
+ // riscv64:"SLLI","SRAI",-"SRAIW"
+ return int64(v) >> 8
+}
+
+func lshConst32x64(v int32) int32 {
+ // ppc64x:"SLW"
+ // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
+ return v << uint64(29)
+}
+
+func rshConst32Ux64(v uint32) uint32 {
+ // ppc64x:"SRW"
+ // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW"
+ return v >> uint64(29)
+}
+
+func rshConst32x64(v int32) int32 {
+ // ppc64x:"SRAW"
+ // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW"
+ return v >> uint64(29)
+}
+
+func lshConst64x32(v int64) int64 {
+ // ppc64x:"SLD"
+ // riscv64:"SLLI",-"AND",-"SLTIU"
+ return v << uint32(33)
+}
+
+func rshConst64Ux32(v uint64) uint64 {
+ // ppc64x:"SRD"
+ // riscv64:"SRLI\t",-"AND",-"SLTIU"
+ return v >> uint32(33)
+}
+
+func rshConst64x32(v int64) int64 {
+ // ppc64x:"SRAD"
+ // riscv64:"SRAI\t",-"OR",-"SLTIU"
+ return v >> uint32(33)
+}
+
+// ------------------ //
+// masked shifts //
+// ------------------ //
+
+func lshMask64x64(v int64, s uint64) int64 {
+ // arm64:"LSL",-"AND"
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SLL",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v << (s & 63)
+}
+
+func rshMask64Ux64(v uint64, s uint64) uint64 {
+ // arm64:"LSR",-"AND",-"CSEL"
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SRL\t",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func rshMask64x64(v int64, s uint64) int64 {
+ // arm64:"ASR",-"AND",-"CSEL"
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SRA\t",-"OR",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func lshMask32x64(v int32, s uint64) int32 {
+ // arm64:"LSL",-"AND"
+ // ppc64x:"ISEL",-"ORN"
+ // riscv64:"SLL",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v << (s & 63)
+}
+
+func rshMask32Ux64(v uint32, s uint64) uint32 {
+ // arm64:"LSR",-"AND"
+ // ppc64x:"ISEL",-"ORN"
+ // riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func rsh5Mask32Ux64(v uint32, s uint64) uint32 {
+ // riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t"
+ return v >> (s & 31)
+}
+
+func rshMask32x64(v int32, s uint64) int32 {
+ // arm64:"ASR",-"AND"
+ // ppc64x:"ISEL",-"ORN"
+ // riscv64:"SRAW","OR","SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func rsh5Mask32x64(v int32, s uint64) int32 {
+ // riscv64:"SRAW",-"OR",-"SLTIU"
+ return v >> (s & 31)
+}
+
+func lshMask64x32(v int64, s uint32) int64 {
+ // arm64:"LSL",-"AND"
+ // ppc64x:"RLDICL",-"ORN"
+ // riscv64:"SLL",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v << (s & 63)
+}
+
+func rshMask64Ux32(v uint64, s uint32) uint64 {
+ // arm64:"LSR",-"AND",-"CSEL"
+ // ppc64x:"RLDICL",-"ORN"
+ // riscv64:"SRL\t",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func rshMask64x32(v int64, s uint32) int64 {
+ // arm64:"ASR",-"AND",-"CSEL"
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SRA\t",-"OR",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> (s & 63)
+}
+
+func lshMask64x32Ext(v int64, s int32) int64 {
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SLL",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v << uint(s&63)
+}
+
+func rshMask64Ux32Ext(v uint64, s int32) uint64 {
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SRL\t",-"AND\t",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> uint(s&63)
+}
+
+func rshMask64x32Ext(v int64, s int32) int64 {
+ // ppc64x:"RLDICL",-"ORN",-"ISEL"
+ // riscv64:"SRA\t",-"OR",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ return v >> uint(s&63)
+}
+
+// --------------- //
+// signed shifts //
+// --------------- //
+
+// We do want to generate a test + panicshift for these cases.
+func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+ // amd64:"TESTB"
+ _ = x << v8
+ // amd64:"TESTW"
+ _ = x << v16
+ // amd64:"TESTL"
+ _ = x << v32
+ // amd64:"TESTQ"
+ _ = x << v64
+}
+
+// We want to avoid generating a test + panicshift for these cases.
+func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
+ // amd64:-"TESTB"
+ _ = x << (v8 & 7)
+ // amd64:-"TESTW"
+ _ = x << (v16 & 15)
+ // amd64:-"TESTL"
+ _ = x << (v32 & 31)
+ // amd64:-"TESTQ"
+ _ = x << (v64 & 63)
+}
+
+// ------------------ //
+// bounded shifts //
+// ------------------ //
+
+func lshGuarded64(v int64, s uint) int64 {
+ if s < 64 {
+ // riscv64:"SLL",-"AND",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
+ // arm64:"LSL",-"CSEL"
+ return v << s
+ }
+ panic("shift too large")
+}
+
+func rshGuarded64U(v uint64, s uint) uint64 {
+ if s < 64 {
+ // riscv64:"SRL\t",-"AND",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
+ // arm64:"LSR",-"CSEL"
+ return v >> s
+ }
+ panic("shift too large")
+}
+
+func rshGuarded64(v int64, s uint) int64 {
+ if s < 64 {
+ // riscv64:"SRA\t",-"OR",-"SLTIU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
+ // arm64:"ASR",-"CSEL"
+ return v >> s
+ }
+ panic("shift too large")
+}
+
+func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+ if shift >= 0 && shift < 64 {
+ // arm64:"LSL",-"CSEL"
+ r1 = val64 << shift
+ }
+ if shift >= 0 && shift < 32 {
+ // arm64:"LSL",-"CSEL"
+ r2 = val32 << shift
+ }
+ if shift >= 0 && shift < 16 {
+ // arm64:"LSL",-"CSEL"
+ r3 = val16 << shift
+ }
+ if shift >= 0 && shift < 8 {
+ // arm64:"LSL",-"CSEL"
+ r4 = val8 << shift
+ }
+ return r1, r2, r3, r4
+}
+
+func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+ if shift >= 0 && shift < 64 {
+ // arm64:"LSL",-"CSEL"
+ r1 = val64 << shift
+ }
+ if shift >= 0 && shift < 32 {
+ // arm64:"LSL",-"CSEL"
+ r2 = val32 << shift
+ }
+ if shift >= 0 && shift < 16 {
+ // arm64:"LSL",-"CSEL"
+ r3 = val16 << shift
+ }
+ if shift >= 0 && shift < 8 {
+ // arm64:"LSL",-"CSEL"
+ r4 = val8 << shift
+ }
+ return r1, r2, r3, r4
+}
+
+func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
+ if shift >= 0 && shift < 64 {
+ // arm64:"LSR",-"CSEL"
+ r1 = val64 >> shift
+ }
+ if shift >= 0 && shift < 32 {
+ // arm64:"LSR",-"CSEL"
+ r2 = val32 >> shift
+ }
+ if shift >= 0 && shift < 16 {
+ // arm64:"LSR",-"CSEL"
+ r3 = val16 >> shift
+ }
+ if shift >= 0 && shift < 8 {
+ // arm64:"LSR",-"CSEL"
+ r4 = val8 >> shift
+ }
+ return r1, r2, r3, r4
+}
+
+func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
+ if shift >= 0 && shift < 64 {
+ // arm64:"ASR",-"CSEL"
+ r1 = val64 >> shift
+ }
+ if shift >= 0 && shift < 32 {
+ // arm64:"ASR",-"CSEL"
+ r2 = val32 >> shift
+ }
+ if shift >= 0 && shift < 16 {
+ // arm64:"ASR",-"CSEL"
+ r3 = val16 >> shift
+ }
+ if shift >= 0 && shift < 8 {
+ // arm64:"ASR",-"CSEL"
+ r4 = val8 >> shift
+ }
+ return r1, r2, r3, r4
+}
+
+func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
+
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f := tab[byte(v)^b]
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[byte(v)&b]
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[byte(v)|b]
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)&h]
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)^h]
+ // ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)|h]
+ // ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
+ f += tab[v&0xff]
+ // ppc64x:-".*AND",".*CLRLSLWI"
+ f += 2 * uint32(uint16(d))
+ // ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
+ g := 2 * uint64(uint32(d))
+ return f, g
+}
+
+func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
+
+ // ppc64x:-"AND","CLRLSLWI"
+ f := (v8 & 0xF) << 2
+ // ppc64x:"CLRLSLWI"
+ f += byte(v16) << 3
+ // ppc64x:-"AND","CLRLSLWI"
+ g := (v16 & 0xFF) << 3
+ // ppc64x:-"AND","CLRLSLWI"
+ h := (v32 & 0xFFFFF) << 2
+ // ppc64x:"CLRLSLDI"
+ i := (v64 & 0xFFFFFFFF) << 5
+ // ppc64x:-"CLRLSLDI"
+ i += (v64 & 0xFFFFFFF) << 38
+ // ppc64x/power9:-"CLRLSLDI"
+ i += (v64 & 0xFFFF00) << 10
+ // ppc64x/power9:-"SLD","EXTSWSLI"
+ j := int64(x32+32) * 8
+ return f, g, h, i, j
+}
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+ // ppc64x:-".*MOVW"
+ f := int32(v >> 32)
+ // ppc64x:".*MOVW"
+ f += int32(v >> 31)
+ // ppc64x:-".*MOVH"
+ g := int16(v >> 48)
+ // ppc64x:".*MOVH"
+ g += int16(v >> 30)
+ // ppc64x:-".*MOVH"
+ g += int16(f >> 16)
+ // ppc64x:-".*MOVB"
+ h := int8(v >> 56)
+ // ppc64x:".*MOVB"
+ h += int8(v >> 28)
+ // ppc64x:-".*MOVB"
+ h += int8(f >> 24)
+ // ppc64x:".*MOVB"
+ h += int8(f >> 16)
+ return int64(h), uint64(g)
+}
+
+func checkShiftAndMask32(v []uint32) {
+ i := 0
+
+ // ppc64x: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
+ v[i] = (v[i] & 0xFF00000) >> 8
+ i++
+ // ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
+ v[i] = (v[i] & 0xFF00) >> 6
+ i++
+ // ppc64x: "MOVW\tR0"
+ v[i] = (v[i] & 0xFF) >> 8
+ i++
+ // ppc64x: "MOVW\tR0"
+ v[i] = (v[i] & 0xF000000) >> 28
+ i++
+ // ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
+ v[i] = (v[i] >> 6) & 0xFF
+ i++
+ // ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
+ v[i] = (v[i] >> 6) & 0xFF000
+ i++
+ // ppc64x: "MOVW\tR0"
+ v[i] = (v[i] >> 20) & 0xFF000
+ i++
+ // ppc64x: "MOVW\tR0"
+ v[i] = (v[i] >> 24) & 0xFF00
+ i++
+}
+
+func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
+ // ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
+ a[0] = a[uint8(v>>24)]
+ // ppc64x: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
+ b[0] = b[uint8(v>>24)]
+ // ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
+ b[1] = b[(v>>20)&0xFF]
+ // ppc64x: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
+ b[2] = b[v>>25]
+}
+
+// 128 bit shifts
+
+func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
+ s := bits & 63
+ ŝ := (64 - bits) & 63
+ // check that the shift operation has two commas (three operands)
+ // amd64:"SHRQ.*,.*,"
+ shr := x>>s | y<<ŝ
+ // amd64:"SHLQ.*,.*,"
+ shl := x<<s | y>>ŝ
+ return shr, shl
+}
+
+func checkShiftToMask(u []uint64, s []int64) {
+ // amd64:-"SHR",-"SHL","ANDQ"
+ u[0] = u[0] >> 5 << 5
+ // amd64:-"SAR",-"SHL","ANDQ"
+ s[0] = s[0] >> 5 << 5
+ // amd64:-"SHR",-"SHL","ANDQ"
+ u[1] = u[1] << 5 >> 5
+}
diff --git a/test/codegen/shortcircuit.go b/test/codegen/shortcircuit.go
new file mode 100644
index 0000000..e971dca
--- /dev/null
+++ b/test/codegen/shortcircuit.go
@@ -0,0 +1,17 @@
+// asmcheck
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func efaceExtract(e interface{}) int {
+ // This should be compiled with only
+ // a single conditional jump.
+ // amd64:-"JMP"
+ if x, ok := e.(int); ok {
+ return x
+ }
+ return 0
+}
diff --git a/test/codegen/slices.go b/test/codegen/slices.go
new file mode 100644
index 0000000..a38fe77
--- /dev/null
+++ b/test/codegen/slices.go
@@ -0,0 +1,426 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "unsafe"
+
+// This file contains code generation tests related to the handling of
+// slice types.
+
+// ------------------ //
+// Clear //
+// ------------------ //
+
+// Issue #5373 optimize memset idiom
+// Some of the clears get inlined, see #56997
+
+func SliceClear(s []int) []int {
+ // amd64:`.*memclrNoHeapPointers`
+ // ppc64x:`.*memclrNoHeapPointers`
+ for i := range s {
+ s[i] = 0
+ }
+ return s
+}
+
+func SliceClearPointers(s []*int) []*int {
+ // amd64:`.*memclrHasPointers`
+ // ppc64x:`.*memclrHasPointers`
+ for i := range s {
+ s[i] = nil
+ }
+ return s
+}
+
+// ------------------ //
+// Extension //
+// ------------------ //
+
+// Issue #21266 - avoid makeslice in append(x, make([]T, y)...)
+
+func SliceExtensionConst(s []int) []int {
+ // amd64:-`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:-`.*runtime\.panicmakeslicelen`
+ // amd64:"MOVUPS\tX15"
+ // ppc64x:-`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ // ppc64x:-`.*runtime\.panicmakeslicelen`
+ return append(s, make([]int, 1<<2)...)
+}
+
+func SliceExtensionConstInt64(s []int) []int {
+ // amd64:-`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:-`.*runtime\.panicmakeslicelen`
+ // amd64:"MOVUPS\tX15"
+ // ppc64x:-`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ // ppc64x:-`.*runtime\.panicmakeslicelen`
+ return append(s, make([]int, int64(1<<2))...)
+}
+
+func SliceExtensionConstUint64(s []int) []int {
+ // amd64:-`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:-`.*runtime\.panicmakeslicelen`
+ // amd64:"MOVUPS\tX15"
+ // ppc64x:-`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ // ppc64x:-`.*runtime\.panicmakeslicelen`
+ return append(s, make([]int, uint64(1<<2))...)
+}
+
+func SliceExtensionConstUint(s []int) []int {
+ // amd64:-`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:-`.*runtime\.panicmakeslicelen`
+ // amd64:"MOVUPS\tX15"
+ // ppc64x:-`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ // ppc64x:-`.*runtime\.panicmakeslicelen`
+ return append(s, make([]int, uint(1<<2))...)
+}
+
+// On ppc64x continue to use memclrNoHeapPointers
+// for sizes >= 512.
+func SliceExtensionConst512(s []int) []int {
+ // amd64:-`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:`.*runtime\.memclrNoHeapPointers`
+ return append(s, make([]int, 1<<9)...)
+}
+
+func SliceExtensionPointer(s []*int, l int) []*int {
+ // amd64:`.*runtime\.memclrHasPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // ppc64x:`.*runtime\.memclrHasPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ return append(s, make([]*int, l)...)
+}
+
+func SliceExtensionVar(s []byte, l int) []byte {
+ // amd64:`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // ppc64x:`.*runtime\.memclrNoHeapPointers`
+ // ppc64x:-`.*runtime\.makeslice`
+ return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarInt64(s []byte, l int64) []byte {
+ // amd64:`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:`.*runtime\.panicmakeslicelen`
+ return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint64(s []byte, l uint64) []byte {
+ // amd64:`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:`.*runtime\.panicmakeslicelen`
+ return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionVarUint(s []byte, l uint) []byte {
+ // amd64:`.*runtime\.memclrNoHeapPointers`
+ // amd64:-`.*runtime\.makeslice`
+ // amd64:`.*runtime\.panicmakeslicelen`
+ return append(s, make([]byte, l)...)
+}
+
+func SliceExtensionInt64(s []int, l64 int64) []int {
+ // 386:`.*runtime\.makeslice`
+ // 386:-`.*runtime\.memclr`
+ return append(s, make([]int, l64)...)
+}
+
+// ------------------ //
+// Make+Copy //
+// ------------------ //
+
+// Issue #26252 - avoid memclr for make+copy
+
+func SliceMakeCopyLen(s []int) []int {
+ // amd64:`.*runtime\.mallocgc`
+ // amd64:`.*runtime\.memmove`
+ // amd64:-`.*runtime\.makeslice`
+ // ppc64x:`.*runtime\.mallocgc`
+ // ppc64x:`.*runtime\.memmove`
+ // ppc64x:-`.*runtime\.makeslice`
+ a := make([]int, len(s))
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyLenPtr(s []*int) []*int {
+ // amd64:`.*runtime\.makeslicecopy`
+ // amd64:-`.*runtime\.makeslice\(`
+ // amd64:-`.*runtime\.typedslicecopy
+ // ppc64x:`.*runtime\.makeslicecopy`
+ // ppc64x:-`.*runtime\.makeslice\(`
+ // ppc64x:-`.*runtime\.typedslicecopy
+ a := make([]*int, len(s))
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyConst(s []int) []int {
+ // amd64:`.*runtime\.makeslicecopy`
+ // amd64:-`.*runtime\.makeslice\(`
+ // amd64:-`.*runtime\.memmove`
+ a := make([]int, 4)
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyConstPtr(s []*int) []*int {
+ // amd64:`.*runtime\.makeslicecopy`
+ // amd64:-`.*runtime\.makeslice\(`
+ // amd64:-`.*runtime\.typedslicecopy
+ a := make([]*int, 4)
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoOptNoDeref(s []*int) []*int {
+ a := new([]*int)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ *a = make([]*int, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(*a, s)
+ return *a
+}
+
+func SliceMakeCopyNoOptNoVar(s []*int) []*int {
+ a := make([][]*int, 1)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a[0] = make([]*int, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(a[0], s)
+ return a[0]
+}
+
+func SliceMakeCopyNoOptBlank(s []*int) []*int {
+ var a []*int
+ // amd64:-`.*runtime\.makeslicecopy`
+ _ = make([]*int, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoOptNoMake(s []*int) []*int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:-`.*runtime\.objectnew`
+ a := *new([]*int)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoOptNoHeapAlloc(s []*int) int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ a := make([]*int, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(a, s)
+ return cap(a)
+}
+
+func SliceMakeCopyNoOptNoCap(s []*int) []*int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 0, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.typedslicecopy`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoOptNoCopy(s []*int) []*int {
+ copy := func(x, y []*int) {}
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 4)
+ // amd64:-`.*runtime\.makeslicecopy`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoOptWrongOrder(s []*int) []*int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 4)
+ // amd64:`.*runtime\.typedslicecopy`
+ // amd64:-`.*runtime\.makeslicecopy`
+ copy(s, a)
+ return a
+}
+
+func SliceMakeCopyNoOptWrongAssign(s []*int) []*int {
+ var a []*int
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ s = make([]*int, 4)
+ // amd64:`.*runtime\.typedslicecopy`
+ // amd64:-`.*runtime\.makeslicecopy`
+ copy(a, s)
+ return s
+}
+
+func SliceMakeCopyNoOptCopyLength(s []*int) (int, []*int) {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 4)
+ // amd64:`.*runtime\.typedslicecopy`
+ // amd64:-`.*runtime\.makeslicecopy`
+ n := copy(a, s)
+ return n, a
+}
+
+func SliceMakeCopyNoOptSelfCopy(s []*int) []*int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 4)
+ // amd64:`.*runtime\.typedslicecopy`
+ // amd64:-`.*runtime\.makeslicecopy`
+ copy(a, a)
+ return a
+}
+
+func SliceMakeCopyNoOptTargetReference(s []*int) []*int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]*int, 4)
+ // amd64:`.*runtime\.typedslicecopy`
+ // amd64:-`.*runtime\.makeslicecopy`
+ copy(a, s[:len(a)])
+ return a
+}
+
+func SliceMakeCopyNoOptCap(s []int) []int {
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.makeslice\(`
+ a := make([]int, len(s), 9)
+ // amd64:-`.*runtime\.makeslicecopy`
+ // amd64:`.*runtime\.memmove`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeCopyNoMemmoveDifferentLen(s []int) []int {
+ // amd64:`.*runtime\.makeslicecopy`
+ // amd64:-`.*runtime\.memmove`
+ a := make([]int, len(s)-1)
+ // amd64:-`.*runtime\.memmove`
+ copy(a, s)
+ return a
+}
+
+func SliceMakeEmptyPointerToZerobase() []int {
+ // amd64:`LEAQ.+runtime\.zerobase`
+ // amd64:-`.*runtime\.makeslice`
+ return make([]int, 0)
+}
+
+// ---------------------- //
+// Nil check of &s[0] //
+// ---------------------- //
+// See issue 30366
+func SliceNilCheck(s []int) {
+ p := &s[0]
+ // amd64:-`TESTB`
+ _ = *p
+}
+
+// ---------------------- //
+// Init slice literal //
+// ---------------------- //
+// See issue 21561
+func InitSmallSliceLiteral() []int {
+ // amd64:`MOVQ\t[$]42`
+ return []int{42}
+}
+
+func InitNotSmallSliceLiteral() []int {
+ // amd64:`LEAQ\t.*stmp_`
+ return []int{
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ }
+}
+
+// --------------------------------------- //
+// Test PPC64 SUBFCconst folding rules //
+// triggered by slice operations. //
+// --------------------------------------- //
+
+func SliceWithConstCompare(a []int, b int) []int {
+ var c []int = []int{1, 2, 3, 4, 5}
+ if b+len(a) < len(c) {
+ // ppc64x:-"NEG"
+ return c[b:]
+ }
+ return a
+}
+
+func SliceWithSubtractBound(a []int, b int) []int {
+ // ppc64x:"SUBC",-"NEG"
+ return a[(3 - b):]
+}
+
+// --------------------------------------- //
+// Code generation for unsafe.Slice //
+// --------------------------------------- //
+
+func Slice1(p *byte, i int) []byte {
+ // amd64:-"MULQ"
+ return unsafe.Slice(p, i)
+}
+func Slice0(p *struct{}, i int) []struct{} {
+ // amd64:-"MULQ"
+ return unsafe.Slice(p, i)
+}
diff --git a/test/codegen/smallintiface.go b/test/codegen/smallintiface.go
new file mode 100644
index 0000000..0207a0a
--- /dev/null
+++ b/test/codegen/smallintiface.go
@@ -0,0 +1,22 @@
+// asmcheck
+
+package codegen
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+func booliface() interface{} {
+ // amd64:`LEAQ\truntime.staticuint64s\+8\(SB\)`
+ return true
+}
+
+func smallint8iface() interface{} {
+ // amd64:`LEAQ\truntime.staticuint64s\+2024\(SB\)`
+ return int8(-3)
+}
+
+func smalluint8iface() interface{} {
+ // amd64:`LEAQ\truntime.staticuint64s\+24\(SB\)`
+ return uint8(3)
+}
diff --git a/test/codegen/spectre.go b/test/codegen/spectre.go
new file mode 100644
index 0000000..1b22b77
--- /dev/null
+++ b/test/codegen/spectre.go
@@ -0,0 +1,39 @@
+// asmcheck -gcflags=-spectre=index
+
+//go:build amd64
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func IndexArray(x *[10]int, i int) int {
+ // amd64:`CMOVQCC`
+ return x[i]
+}
+
+func IndexString(x string, i int) byte {
+ // amd64:`CMOVQ(LS|CC)`
+ return x[i]
+}
+
+func IndexSlice(x []float64, i int) float64 {
+ // amd64:`CMOVQ(LS|CC)`
+ return x[i]
+}
+
+func SliceArray(x *[10]int, i, j int) []int {
+ // amd64:`CMOVQHI`
+ return x[i:j]
+}
+
+func SliceString(x string, i, j int) string {
+ // amd64:`CMOVQHI`
+ return x[i:j]
+}
+
+func SliceSlice(x []float64, i, j int) []float64 {
+ // amd64:`CMOVQHI`
+ return x[i:j]
+}
diff --git a/test/codegen/stack.go b/test/codegen/stack.go
new file mode 100644
index 0000000..eebbbf1
--- /dev/null
+++ b/test/codegen/stack.go
@@ -0,0 +1,115 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+import "runtime"
+
+// This file contains code generation tests related to the use of the
+// stack.
+
+// Check that stack stores are optimized away.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]-4-"
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func StackStore() int {
+ var x int
+ return *(&x)
+}
+
+type T struct {
+ A, B, C, D int // keep exported fields
+ x, y, z int // reset unexported fields
+}
+
+// Check that large structs are cleared directly (issue #24416).
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ZeroLargeStruct(x *T) {
+ t := T{}
+ *x = t
+}
+
+// Check that structs are partially initialised directly (issue #24386).
+
+// Notes:
+// - 386 fails due to spilling a register
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+// Note: that 386 currently has to spill a register.
+func KeepWanted(t *T) {
+ *t = T{A: t.A, B: t.B, C: t.C, D: t.D}
+}
+
+// Check that small array operations avoid using the stack (issue #15925).
+
+// Notes:
+// - 386 fails due to spilling a register
+// - arm & mips fail due to softfloat calls
+// amd64:"TEXT\t.*, [$]0-"
+// arm64:"TEXT\t.*, [$]0-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayAdd64(a, b [4]float64) [4]float64 {
+ return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}
+}
+
+// Check that small array initialization avoids using the stack.
+
+// 386:"TEXT\t.*, [$]0-"
+// amd64:"TEXT\t.*, [$]0-"
+// arm:"TEXT\t.*, [$]0-" (spills return address)
+// arm64:"TEXT\t.*, [$]0-"
+// mips:"TEXT\t.*, [$]-4-"
+// ppc64x:"TEXT\t.*, [$]0-"
+// s390x:"TEXT\t.*, [$]0-"
+func ArrayInit(i, j int) [4]int {
+ return [4]int{i, 0, j, 0}
+}
+
+// Check that assembly output has matching offset and base register
+// (issue #21064).
+
+func check_asmout(b [2]int) int {
+ runtime.GC() // use some frame
+ // amd64:`.*b\+24\(SP\)`
+ // arm:`.*b\+4\(FP\)`
+ return b[1]
+}
+
+// Check that simple functions get promoted to nosplit, even when
+// they might panic in various ways. See issue 31219.
+// amd64:"TEXT\t.*NOSPLIT.*"
+func MightPanic(a []int, i, j, k, s int) {
+ _ = a[i] // panicIndex
+ _ = a[i:j] // panicSlice
+ _ = a[i:j:k] // also panicSlice
+ _ = i << s // panicShift
+ _ = i / j // panicDivide
+}
+
+// Put a defer in a loop, so second defer is not open-coded
+func Defer() {
+ for i := 0; i < 2; i++ {
+ defer func() {}()
+ }
+ // amd64:`CALL\truntime\.deferprocStack`
+ defer func() {}()
+}
diff --git a/test/codegen/strings.go b/test/codegen/strings.go
new file mode 100644
index 0000000..f98c062
--- /dev/null
+++ b/test/codegen/strings.go
@@ -0,0 +1,80 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// string types.
+
+func CountRunes(s string) int { // Issue #24923
+ // amd64:`.*countrunes`
+ return len([]rune(s))
+}
+
+func CountBytes(s []byte) int {
+ // amd64:-`.*runtime.slicebytetostring`
+ return len(string(s))
+}
+
+func ToByteSlice() []byte { // Issue #24698
+ // amd64:`LEAQ\ttype:\[3\]uint8`
+ // amd64:`CALL\truntime\.newobject`
+ // amd64:-`.*runtime.stringtoslicebyte`
+ return []byte("foo")
+}
+
+// Loading from read-only symbols should get transformed into constants.
+func ConstantLoad() {
+ // 12592 = 0x3130
+ // 50 = 0x32
+ // amd64:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+ // 386:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(`
+ // arm:`MOVW\t\$48`,`MOVW\t\$49`,`MOVW\t\$50`
+ // arm64:`MOVD\t\$12592`,`MOVD\t\$50`
+ // wasm:`I64Const\t\$12592`,`I64Store16\t\$0`,`I64Const\t\$50`,`I64Store8\t\$2`
+ // mips64:`MOVV\t\$48`,`MOVV\t\$49`,`MOVV\t\$50`
+ bsink = []byte("012")
+
+ // 858927408 = 0x33323130
+ // 13620 = 0x3534
+ // amd64:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+ // 386:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(`
+ // arm64:`MOVD\t\$858927408`,`MOVD\t\$13620`
+ // wasm:`I64Const\t\$858927408`,`I64Store32\t\$0`,`I64Const\t\$13620`,`I64Store16\t\$4`
+ bsink = []byte("012345")
+
+ // 3978425819141910832 = 0x3736353433323130
+ // 7306073769690871863 = 0x6564636261393837
+ // amd64:`MOVQ\t\$3978425819141910832`,`MOVQ\t\$7306073769690871863`
+ // 386:`MOVL\t\$858927408, \(`,`DUFFCOPY`
+ // arm64:`MOVD\t\$3978425819141910832`,`MOVD\t\$7306073769690871863`,`MOVD\t\$15`
+ // wasm:`I64Const\t\$3978425819141910832`,`I64Store\t\$0`,`I64Const\t\$7306073769690871863`,`I64Store\t\$7`
+ bsink = []byte("0123456789abcde")
+
+ // 56 = 0x38
+ // amd64:`MOVQ\t\$3978425819141910832`,`MOVB\t\$56`
+ bsink = []byte("012345678")
+
+ // 14648 = 0x3938
+ // amd64:`MOVQ\t\$3978425819141910832`,`MOVW\t\$14648`
+ bsink = []byte("0123456789")
+
+ // 1650538808 = 0x62613938
+ // amd64:`MOVQ\t\$3978425819141910832`,`MOVL\t\$1650538808`
+ bsink = []byte("0123456789ab")
+}
+
+// self-equality is always true. See issue 60777.
+func EqualSelf(s string) bool {
+ // amd64:`MOVL\t\$1, AX`,-`.*memequal.*`
+ return s == s
+}
+func NotEqualSelf(s string) bool {
+ // amd64:`XORL\tAX, AX`,-`.*memequal.*`
+ return s != s
+}
+
+var bsink []byte
diff --git a/test/codegen/structs.go b/test/codegen/structs.go
new file mode 100644
index 0000000..49a201f
--- /dev/null
+++ b/test/codegen/structs.go
@@ -0,0 +1,48 @@
+// asmcheck
+
+//go:build !goexperiment.cgocheck2
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+// This file contains code generation tests related to the handling of
+// struct types.
+
+// ------------- //
+// Zeroing //
+// ------------- //
+
+type Z1 struct {
+ a, b, c int
+}
+
+func Zero1(t *Z1) { // Issue #18370
+ // amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+ *t = Z1{}
+}
+
+type Z2 struct {
+ a, b, c *int
+}
+
+func Zero2(t *Z2) {
+ // amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)`
+ // amd64:`.*runtime[.]gcWriteBarrier.*\(SB\)`
+ *t = Z2{}
+}
+
+// ------------------ //
+// Initializing //
+// ------------------ //
+
+type I1 struct {
+ a, b, c, d int
+}
+
+func Init1(p *I1) { // Issue #18872
+ // amd64:`MOVQ\t[$]1`,`MOVQ\t[$]2`,`MOVQ\t[$]3`,`MOVQ\t[$]4`
+ *p = I1{1, 2, 3, 4}
+}
diff --git a/test/codegen/switch.go b/test/codegen/switch.go
new file mode 100644
index 0000000..980ea70
--- /dev/null
+++ b/test/codegen/switch.go
@@ -0,0 +1,185 @@
+// asmcheck
+
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// These tests check code generation of switch statements.
+
+package codegen
+
+// see issue 33934
+func f(x string) int {
+ // amd64:-`cmpstring`
+ switch x {
+ case "":
+ return -1
+ case "1", "2", "3":
+ return -2
+ default:
+ return -3
+ }
+}
+
+// use jump tables for 8+ int cases
+func square(x int) int {
+ // amd64:`JMP\s\(.*\)\(.*\)$`
+ // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+ switch x {
+ case 1:
+ return 1
+ case 2:
+ return 4
+ case 3:
+ return 9
+ case 4:
+ return 16
+ case 5:
+ return 25
+ case 6:
+ return 36
+ case 7:
+ return 49
+ case 8:
+ return 64
+ default:
+ return x * x
+ }
+}
+
+// use jump tables for 8+ string lengths
+func length(x string) int {
+ // amd64:`JMP\s\(.*\)\(.*\)$`
+ // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+ switch x {
+ case "a":
+ return 1
+ case "bb":
+ return 2
+ case "ccc":
+ return 3
+ case "dddd":
+ return 4
+ case "eeeee":
+ return 5
+ case "ffffff":
+ return 6
+ case "ggggggg":
+ return 7
+ case "hhhhhhhh":
+ return 8
+ default:
+ return len(x)
+ }
+}
+
+// Use single-byte ordered comparisons for binary searching strings.
+// See issue 53333.
+func mimetype(ext string) string {
+ // amd64: `CMPB\s1\(.*\), \$104$`,-`cmpstring`
+ // arm64: `MOVB\s1\(R.*\), R.*$`, `CMPW\s\$104, R.*$`, -`cmpstring`
+ switch ext {
+ // amd64: `CMPL\s\(.*\), \$1836345390$`
+ // arm64: `MOVD\s\$1836345390`, `CMPW\sR.*, R.*$`
+ case ".htm":
+ return "A"
+ // amd64: `CMPL\s\(.*\), \$1953457454$`
+ // arm64: `MOVD\s\$1953457454`, `CMPW\sR.*, R.*$`
+ case ".eot":
+ return "B"
+ // amd64: `CMPL\s\(.*\), \$1735815982$`
+ // arm64: `MOVD\s\$1735815982`, `CMPW\sR.*, R.*$`
+ case ".svg":
+ return "C"
+ // amd64: `CMPL\s\(.*\), \$1718907950$`
+ // arm64: `MOVD\s\$1718907950`, `CMPW\sR.*, R.*$`
+ case ".ttf":
+ return "D"
+ default:
+ return ""
+ }
+}
+
+// use jump tables for type switches to concrete types.
+func typeSwitch(x any) int {
+ // amd64:`JMP\s\(.*\)\(.*\)$`
+ // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
+ switch x.(type) {
+ case int:
+ return 0
+ case int8:
+ return 1
+ case int16:
+ return 2
+ case int32:
+ return 3
+ case int64:
+ return 4
+ }
+ return 7
+}
+
+type I interface {
+ foo()
+}
+type J interface {
+ bar()
+}
+type IJ interface {
+ I
+ J
+}
+type K interface {
+ baz()
+}
+
+// use a runtime call for type switches to interface types.
+func interfaceSwitch(x any) int {
+ // amd64:`CALL\truntime.interfaceSwitch`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*8)`
+ // arm64:`CALL\truntime.interfaceSwitch`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+ switch x.(type) {
+ case I:
+ return 1
+ case J:
+ return 2
+ default:
+ return 3
+ }
+}
+
+func interfaceSwitch2(x K) int {
+ // amd64:`CALL\truntime.interfaceSwitch`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*8)`
+ // arm64:`CALL\truntime.interfaceSwitch`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+ switch x.(type) {
+ case I:
+ return 1
+ case J:
+ return 2
+ default:
+ return 3
+ }
+}
+
+func interfaceCast(x any) int {
+ // amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+ // arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+ if _, ok := x.(I); ok {
+ return 3
+ }
+ return 5
+}
+
+func interfaceCast2(x K) int {
+ // amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+ // arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+ if _, ok := x.(I); ok {
+ return 3
+ }
+ return 5
+}
+
+func interfaceConv(x IJ) I {
+ // amd64:`CALL\truntime.typeAssert`,`MOVL\t16\(AX\)`,`MOVQ\t8\(.*\)(.*\*1)`
+ // arm64:`CALL\truntime.typeAssert`,`LDAR`,`MOVWU\t16\(R0\)`,`MOVD\t\(R.*\)\(R.*\)`
+ return x
+}
diff --git a/test/codegen/writebarrier.go b/test/codegen/writebarrier.go
new file mode 100644
index 0000000..cfcfe15
--- /dev/null
+++ b/test/codegen/writebarrier.go
@@ -0,0 +1,55 @@
+// asmcheck
+
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func combine2string(p *[2]string, a, b string) {
+ // amd64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+ // arm64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+ p[0] = a
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[1] = b
+}
+
+func combine4string(p *[4]string, a, b, c, d string) {
+ // amd64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+ // arm64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+ p[0] = a
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[1] = b
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[2] = c
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[3] = d
+}
+
+func combine2slice(p *[2][]byte, a, b []byte) {
+ // amd64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+ // arm64:`.*runtime[.]gcWriteBarrier4\(SB\)`
+ p[0] = a
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[1] = b
+}
+
+func combine4slice(p *[4][]byte, a, b, c, d []byte) {
+ // amd64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+ // arm64:`.*runtime[.]gcWriteBarrier8\(SB\)`
+ p[0] = a
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[1] = b
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[2] = c
+ // amd64:-`.*runtime[.]gcWriteBarrier`
+ // arm64:-`.*runtime[.]gcWriteBarrier`
+ p[3] = d
+}
diff --git a/test/codegen/zerosize.go b/test/codegen/zerosize.go
new file mode 100644
index 0000000..ecf3305
--- /dev/null
+++ b/test/codegen/zerosize.go
@@ -0,0 +1,25 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure a pointer variable and a zero-sized variable
+// aren't allocated to the same stack slot.
+// See issue 24993.
+
+package codegen
+
+func zeroSize() {
+ c := make(chan struct{})
+ // amd64:`MOVQ\t\$0, command-line-arguments\.s\+56\(SP\)`
+ var s *int
+ // force s to be a stack object, also use some (fixed) stack space
+ g(&s, 1, 2, 3, 4, 5)
+
+ // amd64:`LEAQ\tcommand-line-arguments\..*\+55\(SP\)`
+ c <- struct{}{}
+}
+
+//go:noinline
+func g(**int, int, int, int, int, int) {}