diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:23:18 +0000 |
commit | 43a123c1ae6613b3efeed291fa552ecd909d3acf (patch) | |
tree | fd92518b7024bc74031f78a1cf9e454b65e73665 /test/codegen | |
parent | Initial commit. (diff) | |
download | golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.tar.xz golang-1.20-43a123c1ae6613b3efeed291fa552ecd909d3acf.zip |
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
50 files changed, 9449 insertions, 0 deletions
diff --git a/test/codegen/README b/test/codegen/README new file mode 100644 index 0000000..b803fe5 --- /dev/null +++ b/test/codegen/README @@ -0,0 +1,153 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +The codegen directory contains code generation tests for the gc +compiler. + + +- Introduction + +The test harness compiles Go code inside files in this directory and +matches the generated assembly (the output of `go tool compile -S`) +against a set of regexps to be specified in comments that follow a +special syntax (described below). The test driver is implemented as a +step of the top-level test/run.go suite, called "asmcheck". + +The codegen harness is part of the all.bash test suite, but for +performance reasons only the codegen tests for the host machine's +GOARCH are enabled by default, and only on GOOS=linux. + +To perform comprehensive tests for all the supported architectures +(even on a non-Linux system), one can run the following command + + $ ../bin/go run run.go -all_codegen -v codegen + +in the top-level test directory. This is recommended after any change +that affect the compiler's code. + +The test harness compiles the tests with the same go toolchain that is +used to run run.go. After writing tests for a newly added codegen +transformation, it can be useful to first run the test harness with a +toolchain from a released Go version (and verify that the new tests +fail), and then re-runnig the tests using the devel toolchain. + + +- Regexps comments syntax + +Instructions to match are specified inside plain comments that start +with an architecture tag, followed by a colon and a quoted Go-style +regexp to be matched. For example, the following test: + + func Sqrt(x float64) float64 { + // amd64:"SQRTSD" + // arm64:"FSQRTD" + return math.Sqrt(x) + } + +verifies that math.Sqrt calls are intrinsified to a SQRTSD instruction +on amd64, and to a FSQRTD instruction on arm64. + +It is possible to put multiple architectures checks into the same +line, as: + + // amd64:"SQRTSD" arm64:"FSQRTD" + +although this form should be avoided when doing so would make the +regexps line excessively long and difficult to read. + +Comments that are on their own line will be matched against the first +subsequent non-comment line. Inline comments are also supported; the +regexp will be matched against the code found on the same line: + + func Sqrt(x float64) float64 { + return math.Sqrt(x) // arm:"SQRTD" + } + +It's possible to specify a comma-separated list of regexps to be +matched. For example, the following test: + + func TZ8(n uint8) int { + // amd64:"BSFQ","ORQ\t\\$256" + return bits.TrailingZeros8(n) + } + +verifies that the code generated for a bits.TrailingZeros8 call on +amd64 contains both a "BSFQ" instruction and an "ORQ $256". + +Note how the ORQ regex includes a tab char (\t). In the Go assembly +syntax, operands are separated from opcodes by a tabulation. + +Regexps can be quoted using either " or `. Special characters must be +escaped accordingly. Both of these are accepted, and equivalent: + + // amd64:"ADDQ\t\\$3" + // amd64:`ADDQ\t\$3` + +and they'll match this assembly line: + + ADDQ $3 + +Negative matches can be specified using a - before the quoted regexp. +For example: + + func MoveSmall() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7} + copy(x[1:], x[:]) // arm64:-".*memmove" + } + +verifies that NO memmove call is present in the assembly generated for +the copy() line. + + +- Architecture specifiers + +There are three different ways to specify on which architecture a test +should be run: + +* Specify only the architecture (eg: "amd64"). This indicates that the + check should be run on all the supported architecture variants. For + instance, arm checks will be run against all supported GOARM + variations (5,6,7). +* Specify both the architecture and a variant, separated by a slash + (eg: "arm/7"). This means that the check will be run only on that + specific variant. +* Specify the operating system, the architecture and the variant, + separated by slashes (eg: "plan9/386/sse2", "plan9/amd64/"). This is + needed in the rare case that you need to do a codegen test affected + by a specific operating system; by default, tests are compiled only + targeting linux. + + +- Remarks, and Caveats + +-- Write small test functions + +As a general guideline, test functions should be small, to avoid +possible interactions between unrelated lines of code that may be +introduced, for example, by the compiler's optimization passes. + +Any given line of Go code could get assigned more instructions than it +may appear from reading the source. In particular, matching all MOV +instructions should be avoided; the compiler may add them for +unrelated reasons and this may render the test ineffective. + +-- Line matching logic + +Regexps are always matched from the start of the instructions line. +This means, for example, that the "MULQ" regexp is equivalent to +"^MULQ" (^ representing the start of the line), and it will NOT match +the following assembly line: + + IMULQ $99, AX + +To force a match at any point of the line, ".*MULQ" should be used. + +For the same reason, a negative regexp like -"memmove" is not enough +to make sure that no memmove call is included in the assembly. A +memmove call looks like this: + + CALL runtime.memmove(SB) + +To make sure that the "memmove" symbol does not appear anywhere in the +assembly, the negative regexp to be used is -".*memmove". diff --git a/test/codegen/addrcalc.go b/test/codegen/addrcalc.go new file mode 100644 index 0000000..45552d2 --- /dev/null +++ b/test/codegen/addrcalc.go @@ -0,0 +1,14 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// Make sure we use ADDQ instead of LEAQ when we can. + +func f(p *[4][2]int, x int) *int { + // amd64:"ADDQ",-"LEAQ" + return &p[x][0] +} diff --git a/test/codegen/alloc.go b/test/codegen/alloc.go new file mode 100644 index 0000000..31455fd --- /dev/null +++ b/test/codegen/alloc.go @@ -0,0 +1,34 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// These tests check that allocating a 0-size object does not +// introduce a call to runtime.newobject. + +package codegen + +func zeroAllocNew1() *struct{} { + // 386:-`CALL\truntime\.newobject` + // amd64:-`CALL\truntime\.newobject` + // arm:-`CALL\truntime\.newobject` + // arm64:-`CALL\truntime\.newobject` + return new(struct{}) +} + +func zeroAllocNew2() *[0]int { + // 386:-`CALL\truntime\.newobject` + // amd64:-`CALL\truntime\.newobject` + // arm:-`CALL\truntime\.newobject` + // arm64:-`CALL\truntime\.newobject` + return new([0]int) +} + +func zeroAllocSliceLit() []int { + // 386:-`CALL\truntime\.newobject` + // amd64:-`CALL\truntime\.newobject` + // arm:-`CALL\truntime\.newobject` + // arm64:-`CALL\truntime\.newobject` + return []int{} +} diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go new file mode 100644 index 0000000..327be24 --- /dev/null +++ b/test/codegen/arithmetic.go @@ -0,0 +1,613 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains codegen tests related to arithmetic +// simplifications and optimizations on integer types. +// For codegen tests on float types, see floats.go. + +// ----------------- // +// Subtraction // +// ----------------- // + +var ef int + +func SubMem(arr []int, b, c, d int) int { + // 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)` + // amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)` + arr[2] -= b + // 386:`SUBL\s[A-Z]+,\s12\([A-Z]+\)` + // amd64:`SUBQ\s[A-Z]+,\s24\([A-Z]+\)` + arr[3] -= b + // 386:`DECL\s16\([A-Z]+\)` + arr[4]-- + // 386:`ADDL\s[$]-20,\s20\([A-Z]+\)` + arr[5] -= 20 + // 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+` + ef -= arr[b] + // 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)` + arr[c] -= b + // 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)` + arr[d] -= 15 + // 386:`DECL\s\([A-Z]+\)\([A-Z]+\*4\)` + arr[b]-- + // amd64:`DECQ\s64\([A-Z]+\)` + arr[8]-- + // 386:"SUBL\t4" + // amd64:"SUBQ\t8" + return arr[0] - arr[1] +} + +func SubFromConst(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR` + b := 40 - a + return b +} + +func SubFromConstNeg(a int) int { + // ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]40,\sR[0-9]+,\sR` + c := 40 - (-a) + return c +} + +func SubSubFromConst(a int) int { + // ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]20,\sR[0-9]+,\sR` + c := 40 - (20 - a) + return c +} + +func AddSubFromConst(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR` + c := 40 + (20 - a) + return c +} + +func NegSubFromConst(a int) int { + // ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR` + c := -(20 - a) + return c +} + +func NegAddFromConstNeg(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR` + c := -(-40 + a) + return c +} + +func SubSubNegSimplify(a, b int) int { + // amd64:"NEGQ" + // ppc64:"NEG" + // ppc64le:"NEG" + r := (a - b) - a + return r +} + +func SubAddSimplify(a, b int) int { + // amd64:-"SUBQ",-"ADDQ" + // ppc64:-"SUB",-"ADD" + // ppc64le:-"SUB",-"ADD" + r := a + (b - a) + return r +} + +func SubAddNegSimplify(a, b int) int { + // amd64:"NEGQ",-"ADDQ",-"SUBQ" + // ppc64:"NEG",-"ADD",-"SUB" + // ppc64le:"NEG",-"ADD",-"SUB" + r := a - (b + a) + return r +} + +func AddAddSubSimplify(a, b, c int) int { + // amd64:-"SUBQ" + // ppc64:-"SUB" + // ppc64le:-"SUB" + r := a + (b + (c - a)) + return r +} + +// -------------------- // +// Multiplication // +// -------------------- // + +func Pow2Muls(n1, n2 int) (int, int) { + // amd64:"SHLQ\t[$]5",-"IMULQ" + // 386:"SHLL\t[$]5",-"IMULL" + // arm:"SLL\t[$]5",-"MUL" + // arm64:"LSL\t[$]5",-"MUL" + // ppc64:"SLD\t[$]5",-"MUL" + // ppc64le:"SLD\t[$]5",-"MUL" + a := n1 * 32 + + // amd64:"SHLQ\t[$]6",-"IMULQ" + // 386:"SHLL\t[$]6",-"IMULL" + // arm:"SLL\t[$]6",-"MUL" + // arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL` + // ppc64:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL" + // ppc64le:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL" + b := -64 * n2 + + return a, b +} + +func Mul_96(n int) int { + // amd64:`SHLQ\t[$]5`,`LEAQ\t\(.*\)\(.*\*2\),`,-`IMULQ` + // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL` + // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` + // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` + // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD` + return n * 96 +} + +func Mul_n120(n int) int { + // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD` + return n * -120 +} + +func MulMemSrc(a []uint32, b []float32) { + // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+` + a[0] *= a[1] + // 386/sse2:`MULSS\s4\([A-Z]+\),\sX[0-9]+` + // amd64:`MULSS\s4\([A-Z]+\),\sX[0-9]+` + b[0] *= b[1] +} + +// Multiplications merging tests + +func MergeMuls1(n int) int { + // amd64:"IMUL3Q\t[$]46" + // 386:"IMUL3L\t[$]46" + // ppc64le:"MULLD\t[$]46" + // ppc64:"MULLD\t[$]46" + return 15*n + 31*n // 46n +} + +func MergeMuls2(n int) int { + // amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)" + // 386:"IMUL3L\t[$]23","ADDL\t[$]29" + // ppc64le/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29" + // ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29" + return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29 +} + +func MergeMuls3(a, n int) int { + // amd64:"ADDQ\t[$]19",-"IMULQ\t[$]19" + // 386:"ADDL\t[$]19",-"IMULL\t[$]19" + // ppc64:"ADD\t[$]19",-"MULLD\t[$]19" + // ppc64le:"ADD\t[$]19",-"MULLD\t[$]19" + return a*n + 19*n // (a+19)n +} + +func MergeMuls4(n int) int { + // amd64:"IMUL3Q\t[$]14" + // 386:"IMUL3L\t[$]14" + // ppc64:"MULLD\t[$]14" + // ppc64le:"MULLD\t[$]14" + return 23*n - 9*n // 14n +} + +func MergeMuls5(a, n int) int { + // amd64:"ADDQ\t[$]-19",-"IMULQ\t[$]19" + // 386:"ADDL\t[$]-19",-"IMULL\t[$]19" + // ppc64:"ADD\t[$]-19",-"MULLD\t[$]19" + // ppc64le:"ADD\t[$]-19",-"MULLD\t[$]19" + return a*n - 19*n // (a-19)n +} + +// -------------- // +// Division // +// -------------- // + +func DivMemSrc(a []float64) { + // 386/sse2:`DIVSD\s8\([A-Z]+\),\sX[0-9]+` + // amd64:`DIVSD\s8\([A-Z]+\),\sX[0-9]+` + a[0] /= a[1] +} + +func Pow2Divs(n1 uint, n2 int) (uint, int) { + // 386:"SHRL\t[$]5",-"DIVL" + // amd64:"SHRQ\t[$]5",-"DIVQ" + // arm:"SRL\t[$]5",-".*udiv" + // arm64:"LSR\t[$]5",-"UDIV" + // ppc64:"SRD" + // ppc64le:"SRD" + a := n1 / 32 // unsigned + + // amd64:"SARQ\t[$]6",-"IDIVQ" + // 386:"SARL\t[$]6",-"IDIVL" + // arm:"SRA\t[$]6",-".*udiv" + // arm64:"ASR\t[$]6",-"SDIV" + // ppc64:"SRAD" + // ppc64le:"SRAD" + b := n2 / 64 // signed + + return a, b +} + +// Check that constant divisions get turned into MULs +func ConstDivs(n1 uint, n2 int) (uint, int) { + // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ" + // 386:"MOVL\t[$]-252645135","MULL",-"DIVL" + // arm64:`MOVD`,`UMULH`,-`DIV` + // arm:`MOVW`,`MUL`,-`.*udiv` + a := n1 / 17 // unsigned + + // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ" + // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL" + // arm64:`SMULH`,-`DIV` + // arm:`MOVW`,`MUL`,-`.*udiv` + b := n2 / 17 // signed + + return a, b +} + +func FloatDivs(a []float32) float32 { + // amd64:`DIVSS\s8\([A-Z]+\),\sX[0-9]+` + // 386/sse2:`DIVSS\s8\([A-Z]+\),\sX[0-9]+` + return a[1] / a[2] +} + +func Pow2Mods(n1 uint, n2 int) (uint, int) { + // 386:"ANDL\t[$]31",-"DIVL" + // amd64:"ANDL\t[$]31",-"DIVQ" + // arm:"AND\t[$]31",-".*udiv" + // arm64:"AND\t[$]31",-"UDIV" + // ppc64:"ANDCC\t[$]31" + // ppc64le:"ANDCC\t[$]31" + a := n1 % 32 // unsigned + + // 386:"SHRL",-"IDIVL" + // amd64:"SHRQ",-"IDIVQ" + // arm:"SRA",-".*udiv" + // arm64:"ASR",-"REM" + // ppc64:"SRAD" + // ppc64le:"SRAD" + b := n2 % 64 // signed + + return a, b +} + +// Check that signed divisibility checks get converted to AND on low bits +func Pow2DivisibleSigned(n1, n2 int) (bool, bool) { + // 386:"TESTL\t[$]63",-"DIVL",-"SHRL" + // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ" + // arm:"AND\t[$]63",-".*udiv",-"SRA" + // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND" + // ppc64:"ANDCC\t[$]63",-"SRAD" + // ppc64le:"ANDCC\t[$]63",-"SRAD" + a := n1%64 == 0 // signed divisible + + // 386:"TESTL\t[$]63",-"DIVL",-"SHRL" + // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ" + // arm:"AND\t[$]63",-".*udiv",-"SRA" + // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND" + // ppc64:"ANDCC\t[$]63",-"SRAD" + // ppc64le:"ANDCC\t[$]63",-"SRAD" + b := n2%64 != 0 // signed indivisible + + return a, b +} + +// Check that constant modulo divs get turned into MULs +func ConstMods(n1 uint, n2 int) (uint, int) { + // amd64:"MOVQ\t[$]-1085102592571150095","MULQ",-"DIVQ" + // 386:"MOVL\t[$]-252645135","MULL",-"DIVL" + // arm64:`MOVD`,`UMULH`,-`DIV` + // arm:`MOVW`,`MUL`,-`.*udiv` + a := n1 % 17 // unsigned + + // amd64:"MOVQ\t[$]-1085102592571150095","IMULQ",-"IDIVQ" + // 386:"MOVL\t[$]-252645135","IMULL",-"IDIVL" + // arm64:`SMULH`,-`DIV` + // arm:`MOVW`,`MUL`,-`.*udiv` + b := n2 % 17 // signed + + return a, b +} + +// Check that divisibility checks x%c==0 are converted to MULs and rotates +func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) { + // amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ" + // 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ" + // arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV" + // arm:"MUL","CMP\t[$]715827882",-".*udiv" + // ppc64:"MULLD","ROTL\t[$]63" + // ppc64le:"MULLD","ROTL\t[$]63" + evenU := n1%6 == 0 + + // amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ" + // 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ" + // arm64:"MOVD\t[$]-8737931403336103397","MUL",-"ROR",-"DIV" + // arm:"MUL","CMP\t[$]226050910",-".*udiv" + // ppc64:"MULLD",-"ROTL" + // ppc64le:"MULLD",-"ROTL" + oddU := n1%19 == 0 + + // amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ" + // 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ" + // arm64:"MUL","ADD\tR","ROR",-"DIV" + // arm:"MUL","ADD\t[$]715827882",-".*udiv" + // ppc64/power8:"MULLD","ADD","ROTL\t[$]63" + // ppc64le/power8:"MULLD","ADD","ROTL\t[$]63" + // ppc64/power9:"MADDLD","ROTL\t[$]63" + // ppc64le/power9:"MADDLD","ROTL\t[$]63" + evenS := n2%6 == 0 + + // amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ" + // 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ" + // arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV" + // arm:"MUL","ADD\t[$]113025455",-".*udiv" + // ppc64/power8:"MULLD","ADD",-"ROTL" + // ppc64/power9:"MADDLD",-"ROTL" + // ppc64le/power8:"MULLD","ADD",-"ROTL" + // ppc64le/power9:"MADDLD",-"ROTL" + oddS := n2%19 == 0 + + return evenU, oddU, evenS, oddS +} + +// Check that fix-up code is not generated for divisions where it has been proven that +// that the divisor is not -1 or that the dividend is > MinIntNN. +func NoFix64A(divr int64) (int64, int64) { + var d int64 = 42 + var e int64 = 84 + if divr > 5 { + d /= divr // amd64:-"JMP" + e %= divr // amd64:-"JMP" + // The following statement is to avoid conflict between the above check + // and the normal JMP generated at the end of the block. + d += e + } + return d, e +} + +func NoFix64B(divd int64) (int64, int64) { + var d int64 + var e int64 + var divr int64 = -1 + if divd > -9223372036854775808 { + d = divd / divr // amd64:-"JMP" + e = divd % divr // amd64:-"JMP" + d += e + } + return d, e +} + +func NoFix32A(divr int32) (int32, int32) { + var d int32 = 42 + var e int32 = 84 + if divr > 5 { + // amd64:-"JMP" + // 386:-"JMP" + d /= divr + // amd64:-"JMP" + // 386:-"JMP" + e %= divr + d += e + } + return d, e +} + +func NoFix32B(divd int32) (int32, int32) { + var d int32 + var e int32 + var divr int32 = -1 + if divd > -2147483648 { + // amd64:-"JMP" + // 386:-"JMP" + d = divd / divr + // amd64:-"JMP" + // 386:-"JMP" + e = divd % divr + d += e + } + return d, e +} + +func NoFix16A(divr int16) (int16, int16) { + var d int16 = 42 + var e int16 = 84 + if divr > 5 { + // amd64:-"JMP" + // 386:-"JMP" + d /= divr + // amd64:-"JMP" + // 386:-"JMP" + e %= divr + d += e + } + return d, e +} + +func NoFix16B(divd int16) (int16, int16) { + var d int16 + var e int16 + var divr int16 = -1 + if divd > -32768 { + // amd64:-"JMP" + // 386:-"JMP" + d = divd / divr + // amd64:-"JMP" + // 386:-"JMP" + e = divd % divr + d += e + } + return d, e +} + +// Check that len() and cap() calls divided by powers of two are +// optimized into shifts and ands + +func LenDiv1(a []int) int { + // 386:"SHRL\t[$]10" + // amd64:"SHRQ\t[$]10" + // arm64:"LSR\t[$]10",-"SDIV" + // arm:"SRL\t[$]10",-".*udiv" + // ppc64:"SRD"\t[$]10" + // ppc64le:"SRD"\t[$]10" + return len(a) / 1024 +} + +func LenDiv2(s string) int { + // 386:"SHRL\t[$]11" + // amd64:"SHRQ\t[$]11" + // arm64:"LSR\t[$]11",-"SDIV" + // arm:"SRL\t[$]11",-".*udiv" + // ppc64:"SRD\t[$]11" + // ppc64le:"SRD\t[$]11" + return len(s) / (4097 >> 1) +} + +func LenMod1(a []int) int { + // 386:"ANDL\t[$]1023" + // amd64:"ANDL\t[$]1023" + // arm64:"AND\t[$]1023",-"SDIV" + // arm/6:"AND",-".*udiv" + // arm/7:"BFC",-".*udiv",-"AND" + // ppc64:"ANDCC\t[$]1023" + // ppc64le:"ANDCC\t[$]1023" + return len(a) % 1024 +} + +func LenMod2(s string) int { + // 386:"ANDL\t[$]2047" + // amd64:"ANDL\t[$]2047" + // arm64:"AND\t[$]2047",-"SDIV" + // arm/6:"AND",-".*udiv" + // arm/7:"BFC",-".*udiv",-"AND" + // ppc64:"ANDCC\t[$]2047" + // ppc64le:"ANDCC\t[$]2047" + return len(s) % (4097 >> 1) +} + +func CapDiv(a []int) int { + // 386:"SHRL\t[$]12" + // amd64:"SHRQ\t[$]12" + // arm64:"LSR\t[$]12",-"SDIV" + // arm:"SRL\t[$]12",-".*udiv" + // ppc64:"SRD\t[$]12" + // ppc64le:"SRD\t[$]12" + return cap(a) / ((1 << 11) + 2048) +} + +func CapMod(a []int) int { + // 386:"ANDL\t[$]4095" + // amd64:"ANDL\t[$]4095" + // arm64:"AND\t[$]4095",-"SDIV" + // arm/6:"AND",-".*udiv" + // arm/7:"BFC",-".*udiv",-"AND" + // ppc64:"ANDCC\t[$]4095" + // ppc64le:"ANDCC\t[$]4095" + return cap(a) % ((1 << 11) + 2048) +} + +func AddMul(x int) int { + // amd64:"LEAQ\t1" + return 2*x + 1 +} + +func MULA(a, b, c uint32) (uint32, uint32, uint32) { + // arm:`MULA`,-`MUL\s` + // arm64:`MADDW`,-`MULW` + r0 := a*b + c + // arm:`MULA`,-`MUL\s` + // arm64:`MADDW`,-`MULW` + r1 := c*79 + a + // arm:`ADD`,-`MULA`,-`MUL\s` + // arm64:`ADD`,-`MADD`,-`MULW` + // ppc64:`ADD`,-`MULLD` + // ppc64le:`ADD`,-`MULLD` + r2 := b*64 + c + return r0, r1, r2 +} + +func MULS(a, b, c uint32) (uint32, uint32, uint32) { + // arm/7:`MULS`,-`MUL\s` + // arm/6:`SUB`,`MUL\s`,-`MULS` + // arm64:`MSUBW`,-`MULW` + r0 := c - a*b + // arm/7:`MULS`,-`MUL\s` + // arm/6:`SUB`,`MUL\s`,-`MULS` + // arm64:`MSUBW`,-`MULW` + r1 := a - c*79 + // arm/7:`SUB`,-`MULS`,-`MUL\s` + // arm64:`SUB`,-`MSUBW`,-`MULW` + // ppc64:`SUB`,-`MULLD` + // ppc64le:`SUB`,-`MULLD` + r2 := c - b*64 + return r0, r1, r2 +} + +func addSpecial(a, b, c uint32) (uint32, uint32, uint32) { + // amd64:`INCL` + a++ + // amd64:`DECL` + b-- + // amd64:`SUBL.*-128` + c += 128 + return a, b, c +} + +// Divide -> shift rules usually require fixup for negative inputs. +// If the input is non-negative, make sure the fixup is eliminated. +func divInt(v int64) int64 { + if v < 0 { + return 0 + } + // amd64:-`.*SARQ.*63,`, -".*SHRQ", ".*SARQ.*[$]9," + return v / 512 +} + +// The reassociate rules "x - (z + C) -> (x - z) - C" and +// "(z + C) -x -> C + (z - x)" can optimize the following cases. +func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) { + // arm64:"SUB","ADD\t[$]2" + // ppc64:"SUB","ADD\t[$]2" + // ppc64le:"SUB","ADD\t[$]2" + r0 := (i0 + 3) - (j0 + 1) + // arm64:"SUB","SUB\t[$]4" + // ppc64:"SUB","ADD\t[$]-4" + // ppc64le:"SUB","ADD\t[$]-4" + r1 := (i1 - 3) - (j1 + 1) + // arm64:"SUB","ADD\t[$]4" + // ppc64:"SUB","ADD\t[$]4" + // ppc64le:"SUB","ADD\t[$]4" + r2 := (i2 + 3) - (j2 - 1) + // arm64:"SUB","SUB\t[$]2" + // ppc64:"SUB","ADD\t[$]-2" + // ppc64le:"SUB","ADD\t[$]-2" + r3 := (i3 - 3) - (j3 - 1) + return r0, r1, r2, r3 +} + +// The reassociate rules "x - (z + C) -> (x - z) - C" and +// "(C - z) - x -> C - (z + x)" can optimize the following cases. +func constantFold2(i0, j0, i1, j1 int) (int, int) { + // arm64:"ADD","MOVD\t[$]2","SUB" + // ppc64le: `SUBC\tR[0-9]+,\s[$]2,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]2,\sR` + r0 := (3 - i0) - (j0 + 1) + // arm64:"ADD","MOVD\t[$]4","SUB" + // ppc64le: `SUBC\tR[0-9]+,\s[$]4,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]4,\sR` + r1 := (3 - i1) - (j1 - 1) + return r0, r1 +} + +func constantFold3(i, j int) int { + // arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL" + // ppc64:"MULLD\t[$]30","MULLD" + // ppc64le:"MULLD\t[$]30","MULLD" + r := (5 * i) * (6 * j) + return r +} diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go new file mode 100644 index 0000000..3276af3 --- /dev/null +++ b/test/codegen/bitfield.go @@ -0,0 +1,368 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains codegen tests related to bit field +// insertion/extraction simplifications/optimizations. + +func extr1(x, x2 uint64) uint64 { + return x<<7 + x2>>57 // arm64:"EXTR\t[$]57," +} + +func extr2(x, x2 uint64) uint64 { + return x<<7 | x2>>57 // arm64:"EXTR\t[$]57," +} + +func extr3(x, x2 uint64) uint64 { + return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57," +} + +func extr4(x, x2 uint32) uint32 { + return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25," +} + +func extr5(x, x2 uint32) uint32 { + return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25," +} + +func extr6(x, x2 uint32) uint32 { + return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25," +} + +// check 32-bit shift masking +func mask32(x uint32) uint32 { + return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL" +} + +// check 16-bit shift masking +func mask16(x uint16) uint16 { + return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL" +} + +// check 8-bit shift masking +func mask8(x uint8) uint8 { + return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL" +} + +func maskshift(x uint64) uint64 { + // arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX" + return ((x << 5) & (0xfff << 5)) >> 5 +} + +// bitfield ops +// bfi +func bfi1(x, y uint64) uint64 { + // arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f) +} + +func bfi2(x, y uint64) uint64 { + // arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND" + return (x << 24 >> 12) | (y & 0xfff0000000000fff) +} + +// bfxil +func bfxil1(x, y uint64) uint64 { + // arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000) +} + +func bfxil2(x, y uint64) uint64 { + // arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND" + return (x << 12 >> 24) | (y & 0xffffff0000000000) +} + +// sbfiz +// merge shifts into sbfiz: (x << lc) >> rc && lc > rc. +func sbfiz1(x int64) int64 { + // arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR" + return (x << 4) >> 3 +} + +// merge shift and sign-extension into sbfiz. +func sbfiz2(x int32) int64 { + return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL" +} + +func sbfiz3(x int16) int64 { + return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL" +} + +func sbfiz4(x int8) int64 { + return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL" +} + +// sbfiz combinations. +// merge shift with sbfiz into sbfiz. +func sbfiz5(x int32) int32 { + // arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR" + return (x << 4) >> 3 +} + +func sbfiz6(x int16) int64 { + return int64(x+1) << 3 // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL" +} + +func sbfiz7(x int8) int64 { + return int64(x+1) << 62 // arm64:"SBFIZ\t[$]62, R[0-9]+, [$]2",-"LSL" +} + +func sbfiz8(x int32) int64 { + return int64(x+1) << 40 // arm64:"SBFIZ\t[$]40, R[0-9]+, [$]24",-"LSL" +} + +// sbfx +// merge shifts into sbfx: (x << lc) >> rc && lc <= rc. +func sbfx1(x int64) int64 { + return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR" +} + +func sbfx2(x int64) int64 { + return (x << 60) >> 60 // arm64:"SBFX\t[$]0, R[0-9]+, [$]4",-"LSL",-"ASR" +} + +// merge shift and sign-extension into sbfx. +func sbfx3(x int32) int64 { + return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR" +} + +func sbfx4(x int16) int64 { + return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR" +} + +func sbfx5(x int8) int64 { + return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR" +} + +func sbfx6(x int32) int64 { + return int64(x >> 30) // arm64:"SBFX\t[$]30, R[0-9]+, [$]2" +} + +func sbfx7(x int16) int64 { + return int64(x >> 10) // arm64:"SBFX\t[$]10, R[0-9]+, [$]6" +} + +func sbfx8(x int8) int64 { + return int64(x >> 5) // arm64:"SBFX\t[$]5, R[0-9]+, [$]3" +} + +// sbfx combinations. +// merge shifts with sbfiz into sbfx. +func sbfx9(x int32) int32 { + return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR" +} + +// merge sbfx and sign-extension into sbfx. +func sbfx10(x int32) int64 { + c := x + 5 + return int64(c >> 20) // arm64"SBFX\t[$]20, R[0-9]+, [$]12",-"MOVW\tR[0-9]+, R[0-9]+" +} + +// ubfiz +// merge shifts into ubfiz: (x<<lc)>>rc && lc>rc +func ubfiz1(x uint64) uint64 { + // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD" + return (x << 4) >> 3 +} + +// merge shift and zero-extension into ubfiz. +func ubfiz2(x uint32) uint64 { + return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL" +} + +func ubfiz3(x uint16) uint64 { + return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL" +} + +func ubfiz4(x uint8) uint64 { + return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL" +} + +func ubfiz5(x uint8) uint64 { + return uint64(x) << 60 // arm64:"UBFIZ\t[$]60, R[0-9]+, [$]4",-"LSL" +} + +func ubfiz6(x uint32) uint64 { + return uint64(x << 30) // arm64:"UBFIZ\t[$]30, R[0-9]+, [$]2", +} + +func ubfiz7(x uint16) uint64 { + return uint64(x << 10) // arm64:"UBFIZ\t[$]10, R[0-9]+, [$]6", +} + +func ubfiz8(x uint8) uint64 { + return uint64(x << 7) // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]1", +} + +// merge ANDconst into ubfiz. +func ubfiz9(x uint64) uint64 { + // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND" + return (x & 0xfff) << 3 +} + +func ubfiz10(x uint64) uint64 { + // arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND" + // s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND" + return (x << 4) & 0xfff0 +} + +// ubfiz combinations +func ubfiz11(x uint32) uint32 { + // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR" + return (x << 4) >> 3 +} + +func ubfiz12(x uint64) uint64 { + // arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND" + return ((x & 0xfffff) << 4) >> 3 +} + +func ubfiz13(x uint64) uint64 { + // arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND" + return ((x << 3) & 0xffff) << 2 +} + +func ubfiz14(x uint64) uint64 { + // arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND" + return ((x << 5) & (0xfff << 5)) << 2 +} + +// ubfx +// merge shifts into ubfx: (x<<lc)>>rc && lc<rc +func ubfx1(x uint64) uint64 { + // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD" + return (x << 1) >> 2 +} + +// merge shift and zero-extension into ubfx. +func ubfx2(x uint32) uint64 { + return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR" +} + +func ubfx3(x uint16) uint64 { + return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR" +} + +func ubfx4(x uint8) uint64 { + return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR" +} + +func ubfx5(x uint32) uint64 { + return uint64(x) >> 30 // arm64:"UBFX\t[$]30, R[0-9]+, [$]2" +} + +func ubfx6(x uint16) uint64 { + return uint64(x) >> 10 // arm64:"UBFX\t[$]10, R[0-9]+, [$]6" +} + +func ubfx7(x uint8) uint64 { + return uint64(x) >> 3 // arm64:"UBFX\t[$]3, R[0-9]+, [$]5" +} + +// merge ANDconst into ubfx. +func ubfx8(x uint64) uint64 { + // arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND" + return (x >> 25) & 1023 +} + +func ubfx9(x uint64) uint64 { + // arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND" + return (x & 0x0ff0) >> 4 +} + +// ubfx combinations. +func ubfx10(x uint32) uint32 { + // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR" + return (x << 1) >> 2 +} + +func ubfx11(x uint64) uint64 { + // arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND" + return ((x << 1) >> 2) & 0xfff +} + +func ubfx12(x uint64) uint64 { + // arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND" + // s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND" + return ((x >> 3) & 0xfff) >> 1 +} + +func ubfx13(x uint64) uint64 { + // arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD" + return ((x >> 2) << 5) >> 8 +} + +func ubfx14(x uint64) uint64 { + // arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR" + // s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND" + return ((x & 0xfffff) << 3) >> 4 +} + +// merge ubfx and zero-extension into ubfx. +func ubfx15(x uint64) bool { + midr := x + 10 + part_num := uint16((midr >> 4) & 0xfff) + if part_num == 0xd0c { // arm64:"UBFX\t[$]4, R[0-9]+, [$]12",-"MOVHU\tR[0-9]+, R[0-9]+" + return true + } + return false +} + +// merge ANDconst and ubfx into ubfx +func ubfx16(x uint64) uint64 { + // arm64:"UBFX\t[$]4, R[0-9]+, [$]6",-"AND\t[$]63" + return ((x >> 3) & 0xfff) >> 1 & 0x3f +} + +// Check that we don't emit comparisons for constant shifts. +// +//go:nosplit +func shift_no_cmp(x int) int { + // arm64:`LSL\t[$]17`,-`CMP` + // mips64:`SLLV\t[$]17`,-`SGT` + return x << 17 +} + +func rev16(c uint64) (uint64, uint64, uint64) { + // arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8` + b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8) + // arm64:-`ADD\tR[0-9]+<<8` + b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8) + // arm64:-`EOR\tR[0-9]+<<8` + b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8) + return b1, b2, b3 +} + +func rev16w(c uint32) (uint32, uint32, uint32) { + // arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8` + b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8) + // arm64:-`ADD\tR[0-9]+<<8` + b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8) + // arm64:-`EOR\tR[0-9]+<<8` + b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8) + return b1, b2, b3 +} + +func shift(x uint32, y uint16, z uint8) uint64 { + // arm64:-`MOVWU`,-`LSR\t[$]32` + a := uint64(x) >> 32 + // arm64:-`MOVHU + b := uint64(y) >> 16 + // arm64:-`MOVBU` + c := uint64(z) >> 8 + // arm64:`MOVD\tZR`,-`ADD\tR[0-9]+>>16`,-`ADD\tR[0-9]+>>8`, + return a + b + c +} diff --git a/test/codegen/bits.go b/test/codegen/bits.go new file mode 100644 index 0000000..cd41392 --- /dev/null +++ b/test/codegen/bits.go @@ -0,0 +1,400 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "math/bits" + +/************************************ + * 64-bit instructions + ************************************/ + +func bitcheck64_constleft(a uint64) (n int) { + // amd64:"BTQ\t[$]63" + if a&(1<<63) != 0 { + return 1 + } + // amd64:"BTQ\t[$]60" + if a&(1<<60) != 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if a&(1<<0) != 0 { + return 1 + } + return 0 +} + +func bitcheck64_constright(a [8]uint64) (n int) { + // amd64:"BTQ\t[$]63" + if (a[0]>>63)&1 != 0 { + return 1 + } + // amd64:"BTQ\t[$]63" + if a[1]>>63 != 0 { + return 1 + } + // amd64:"BTQ\t[$]63" + if a[2]>>63 == 0 { + return 1 + } + // amd64:"BTQ\t[$]60" + if (a[3]>>60)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]1" + if (a[4]>>1)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if (a[5]>>0)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]7" + if (a[6]>>5)&4 == 0 { + return 1 + } + return 0 +} + +func bitcheck64_var(a, b uint64) (n int) { + // amd64:"BTQ" + if a&(1<<(b&63)) != 0 { + return 1 + } + // amd64:"BTQ",-"BT.\t[$]0" + if (b>>(a&63))&1 != 0 { + return 1 + } + return 0 +} + +func bitcheck64_mask(a uint64) (n int) { + // amd64:"BTQ\t[$]63" + if a&0x8000000000000000 != 0 { + return 1 + } + // amd64:"BTQ\t[$]59" + if a&0x800000000000000 != 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if a&0x1 != 0 { + return 1 + } + return 0 +} + +func biton64(a, b uint64) (n uint64) { + // amd64:"BTSQ" + n += b | (1 << (a & 63)) + + // amd64:"BTSQ\t[$]63" + n += a | (1 << 63) + + // amd64:"BTSQ\t[$]60" + n += a | (1 << 60) + + // amd64:"ORQ\t[$]1" + n += a | (1 << 0) + + return n +} + +func bitoff64(a, b uint64) (n uint64) { + // amd64:"BTRQ" + n += b &^ (1 << (a & 63)) + + // amd64:"BTRQ\t[$]63" + n += a &^ (1 << 63) + + // amd64:"BTRQ\t[$]60" + n += a &^ (1 << 60) + + // amd64:"ANDQ\t[$]-2" + n += a &^ (1 << 0) + + return n +} + +func bitcompl64(a, b uint64) (n uint64) { + // amd64:"BTCQ" + n += b ^ (1 << (a & 63)) + + // amd64:"BTCQ\t[$]63" + n += a ^ (1 << 63) + + // amd64:"BTCQ\t[$]60" + n += a ^ (1 << 60) + + // amd64:"XORQ\t[$]1" + n += a ^ (1 << 0) + + return n +} + +/************************************ + * 32-bit instructions + ************************************/ + +func bitcheck32_constleft(a uint32) (n int) { + // amd64:"BTL\t[$]31" + if a&(1<<31) != 0 { + return 1 + } + // amd64:"BTL\t[$]28" + if a&(1<<28) != 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if a&(1<<0) != 0 { + return 1 + } + return 0 +} + +func bitcheck32_constright(a [8]uint32) (n int) { + // amd64:"BTL\t[$]31" + if (a[0]>>31)&1 != 0 { + return 1 + } + // amd64:"BTL\t[$]31" + if a[1]>>31 != 0 { + return 1 + } + // amd64:"BTL\t[$]31" + if a[2]>>31 == 0 { + return 1 + } + // amd64:"BTL\t[$]28" + if (a[3]>>28)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]1" + if (a[4]>>1)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if (a[5]>>0)&1 == 0 { + return 1 + } + // amd64:"BTL\t[$]7" + if (a[6]>>5)&4 == 0 { + return 1 + } + return 0 +} + +func bitcheck32_var(a, b uint32) (n int) { + // amd64:"BTL" + if a&(1<<(b&31)) != 0 { + return 1 + } + // amd64:"BTL",-"BT.\t[$]0" + if (b>>(a&31))&1 != 0 { + return 1 + } + return 0 +} + +func bitcheck32_mask(a uint32) (n int) { + // amd64:"BTL\t[$]31" + if a&0x80000000 != 0 { + return 1 + } + // amd64:"BTL\t[$]27" + if a&0x8000000 != 0 { + return 1 + } + // amd64:"BTL\t[$]0" + if a&0x1 != 0 { + return 1 + } + return 0 +} + +func biton32(a, b uint32) (n uint32) { + // amd64:"BTSL" + n += b | (1 << (a & 31)) + + // amd64:"BTSL\t[$]31" + n += a | (1 << 31) + + // amd64:"BTSL\t[$]28" + n += a | (1 << 28) + + // amd64:"ORL\t[$]1" + n += a | (1 << 0) + + return n +} + +func bitoff32(a, b uint32) (n uint32) { + // amd64:"BTRL" + n += b &^ (1 << (a & 31)) + + // amd64:"BTRL\t[$]31" + n += a &^ (1 << 31) + + // amd64:"BTRL\t[$]28" + n += a &^ (1 << 28) + + // amd64:"ANDL\t[$]-2" + n += a &^ (1 << 0) + + return n +} + +func bitcompl32(a, b uint32) (n uint32) { + // amd64:"BTCL" + n += b ^ (1 << (a & 31)) + + // amd64:"BTCL\t[$]31" + n += a ^ (1 << 31) + + // amd64:"BTCL\t[$]28" + n += a ^ (1 << 28) + + // amd64:"XORL\t[$]1" + n += a ^ (1 << 0) + + return n +} + +// check direct operation on memory with constant and shifted constant sources +func bitOpOnMem(a []uint32, b, c, d uint32) { + // amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)` + a[0] &= 200 + // amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)` + a[1] |= 220 + // amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)` + a[2] ^= 240 +} + +func bitcheckMostNegative(b uint8) bool { + // amd64:"TESTB" + return b&0x80 == 0x80 +} + +// Check AND masking on arm64 (Issue #19857) + +func and_mask_1(a uint64) uint64 { + // arm64:`AND\t` + return a & ((1 << 63) - 1) +} + +func and_mask_2(a uint64) uint64 { + // arm64:`AND\t` + return a & (1 << 63) +} + +func and_mask_3(a, b uint32) (uint32, uint32) { + // arm/7:`BIC`,-`AND` + a &= 0xffffaaaa + // arm/7:`BFC`,-`AND`,-`BIC` + b &= 0xffc003ff + return a, b +} + +// Check generation of arm64 BIC/EON/ORN instructions + +func op_bic(x, y uint32) uint32 { + // arm64:`BIC\t`,-`AND` + return x &^ y +} + +func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 { + // arm64:`EON\t`,-`EOR`,-`MVN` + a[0] = x ^ (y ^ 0xffffffff) + + // arm64:`EON\t`,-`EOR`,-`MVN` + a[1] = ^(y ^ z) + + // arm64:`EON\t`,-`XOR` + a[2] = x ^ ^z + + // arm64:`EON\t`,-`EOR`,-`MVN` + return n ^ (m ^ 0xffffffffffffffff) +} + +func op_orn(x, y uint32) uint32 { + // arm64:`ORN\t`,-`ORR` + return x | ^y +} + +// check bitsets +func bitSetPowerOf2Test(x int) bool { + // amd64:"BTL\t[$]3" + return x&8 == 8 +} + +func bitSetTest(x int) bool { + // amd64:"ANDL\t[$]9, AX" + // amd64:"CMPQ\tAX, [$]9" + return x&9 == 9 +} + +// mask contiguous one bits +func cont1Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]16, [$]47, [$]0," + return x & 0x0000ffffffff0000 +} + +// mask contiguous zero bits +func cont0Mask64U(x uint64) uint64 { + // s390x:"RISBGZ\t[$]48, [$]15, [$]0," + return x & 0xffff00000000ffff +} + +func issue44228a(a []int64, i int) bool { + // amd64: "BTQ", -"SHL" + return a[i>>6]&(1<<(i&63)) != 0 +} +func issue44228b(a []int32, i int) bool { + // amd64: "BTL", -"SHL" + return a[i>>5]&(1<<(i&31)) != 0 +} + +func issue48467(x, y uint64) uint64 { + // arm64: -"NEG" + d, borrow := bits.Sub64(x, y, 0) + return x - d&(-borrow) +} + +func foldConst(x, y uint64) uint64 { + // arm64: "ADDS\t[$]7",-"MOVD\t[$]7" + d, b := bits.Add64(x, 7, 0) + return b & d +} + +func foldConstOutOfRange(a uint64) uint64 { + // arm64: "MOVD\t[$]19088744",-"ADD\t[$]19088744" + return a + 0x1234568 +} + +// Verify sign-extended values are not zero-extended under a bit mask (#61297) +func signextendAndMask8to64(a int8) (s, z uint64) { + // ppc64: "MOVB", "ANDCC\t[$]1015," + // ppc64le: "MOVB", "ANDCC\t[$]1015," + s = uint64(a) & 0x3F7 + // ppc64: -"MOVB", "ANDCC\t[$]247," + // ppc64le: -"MOVB", "ANDCC\t[$]247," + z = uint64(uint8(a)) & 0x3F7 + return + +} + +// Verify zero-extended values are not sign-extended under a bit mask (#61297) +func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) { + // ppc64: -"MOVB\t", -"ANDCC", "MOVBZ" + // ppc64le: -"MOVB\t", -"ANDCC", "MOVBZ" + x = uint64(a) & 0xFF + // ppc64: -"MOVH\t", -"ANDCC", "MOVHZ" + // ppc64le: -"MOVH\t", -"ANDCC", "MOVHZ" + y = uint64(b) & 0xFFFF + return + +} diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go new file mode 100644 index 0000000..3b125a1 --- /dev/null +++ b/test/codegen/bmi.go @@ -0,0 +1,105 @@ +// asmcheck + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func andn64(x, y int64) int64 { + // amd64/v3:"ANDNQ" + return x &^ y +} + +func andn32(x, y int32) int32 { + // amd64/v3:"ANDNL" + return x &^ y +} + +func blsi64(x int64) int64 { + // amd64/v3:"BLSIQ" + return x & -x +} + +func blsi32(x int32) int32 { + // amd64/v3:"BLSIL" + return x & -x +} + +func blsmsk64(x int64) int64 { + // amd64/v3:"BLSMSKQ" + return x ^ (x - 1) +} + +func blsmsk32(x int32) int32 { + // amd64/v3:"BLSMSKL" + return x ^ (x - 1) +} + +func blsr64(x int64) int64 { + // amd64/v3:"BLSRQ" + return x & (x - 1) +} + +func blsr32(x int32) int32 { + // amd64/v3:"BLSRL" + return x & (x - 1) +} + +func sarx64(x, y int64) int64 { + // amd64/v3:"SARXQ" + return x >> y +} + +func sarx32(x, y int32) int32 { + // amd64/v3:"SARXL" + return x >> y +} + +func sarx64_load(x []int64, i int) int64 { + // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s := x[i] >> (i & 63) + // amd64/v3: `SARXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s = x[i+1] >> (s & 63) + return s +} + +func sarx32_load(x []int32, i int) int32 { + // amd64/v3: `SARXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s := x[i] >> (i & 63) + // amd64/v3: `SARXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s = x[i+1] >> (s & 63) + return s +} + +func shlrx64(x, y uint64) uint64 { + // amd64/v3:"SHRXQ" + s := x >> y + // amd64/v3:"SHLXQ" + s = s << y + return s +} + +func shlrx32(x, y uint32) uint32 { + // amd64/v3:"SHRXL" + s := x >> y + // amd64/v3:"SHLXL" + s = s << y + return s +} + +func shlrx64_load(x []uint64, i int, s uint64) uint64 { + // amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s = x[i] >> i + // amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s = x[i+1] << s + return s +} + +func shlrx32_load(x []uint32, i int, s uint32) uint32 { + // amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s = x[i] >> i + // amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s = x[i+1] << s + return s +} diff --git a/test/codegen/bool.go b/test/codegen/bool.go new file mode 100644 index 0000000..d921b55 --- /dev/null +++ b/test/codegen/bool.go @@ -0,0 +1,73 @@ +// asmcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains codegen tests related to boolean simplifications/optimizations. + +func convertNeq0B(x uint8, c bool) bool { + // amd64:"ANDL\t[$]1",-"SETNE" + // ppc64:"ANDCC",-"CMPW",-"ISEL" + // ppc64le:"ANDCC",-"CMPW",-"ISEL" + // ppc64le/power9:"ANDCC",-"CMPW",-"ISEL" + b := x&1 != 0 + return c && b +} + +func convertNeq0W(x uint16, c bool) bool { + // amd64:"ANDL\t[$]1",-"SETNE" + // ppc64:"ANDCC",-"CMPW",-"ISEL" + // ppc64le:"ANDCC",-"CMPW",-"ISEL" + // ppc64le/power9:"ANDCC",-CMPW",-"ISEL" + b := x&1 != 0 + return c && b +} + +func convertNeq0L(x uint32, c bool) bool { + // amd64:"ANDL\t[$]1",-"SETB" + // ppc64:"ANDCC",-"CMPW",-"ISEL" + // ppc64le:"ANDCC",-"CMPW",-"ISEL" + // ppc64le/power9:"ANDCC",-"CMPW",-"ISEL" + b := x&1 != 0 + return c && b +} + +func convertNeq0Q(x uint64, c bool) bool { + // amd64:"ANDL\t[$]1",-"SETB" + // ppc64:"ANDCC",-"CMP",-"ISEL" + // ppc64le:"ANDCC",-"CMP",-"ISEL" + // ppc64le/power9:"ANDCC",-"CMP",-"ISEL" + b := x&1 != 0 + return c && b +} + +func convertNeqBool32(x uint32) bool { + // ppc64:"ANDCC",-"CMPW",-"ISEL" + // ppc64le:"ANDCC",-"CMPW",-"ISEL" + // ppc64le/power9:"ANDCC",-"CMPW",-"ISEL" + return x&1 != 0 +} + +func convertEqBool32(x uint32) bool { + // ppc64:"ANDCC",-"CMPW","XOR",-"ISEL" + // ppc64le:"ANDCC",-"CMPW","XOR",-"ISEL" + // ppc64le/power9:"ANDCC","XOR",-"CMPW",-"ISEL" + return x&1 == 0 +} + +func convertNeqBool64(x uint64) bool { + // ppc64:"ANDCC",-"CMP",-"ISEL" + // ppc64le:"ANDCC",-"CMP",-"ISEL" + // ppc64le/power9:"ANDCC",-"CMP",-"ISEL" + return x&1 != 0 +} + +func convertEqBool64(x uint64) bool { + // ppc64:"ANDCC","XOR",-"CMP",-"ISEL" + // ppc64le:"ANDCC","XOR",-"CMP",-"ISEL" + // ppc64le/power9:"ANDCC","XOR",-"CMP",-"ISEL" + return x&1 == 0 +} diff --git a/test/codegen/clobberdead.go b/test/codegen/clobberdead.go new file mode 100644 index 0000000..732be5f --- /dev/null +++ b/test/codegen/clobberdead.go @@ -0,0 +1,35 @@ +// asmcheck -gcflags=-clobberdead + +// +build amd64 arm64 + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +type T [2]*int // contain pointer, not SSA-able (so locals are not registerized) + +var p1, p2, p3 T + +func F() { + // 3735936685 is 0xdeaddead. On ARM64 R27 is REGTMP. + // clobber x, y at entry. not clobber z (stack object). + // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y`, -`MOVL\t\$3735936685, command-line-arguments\.z` + // arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y`, -`MOVW\tR27, command-line-arguments\.z` + x, y, z := p1, p2, p3 + addrTaken(&z) + // x is dead at the call (the value of x is loaded before the CALL), y is not + // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, -`MOVL\t\$3735936685, command-line-arguments\.y` + // arm64:`MOVW\tR27, command-line-arguments\.x`, -`MOVW\tR27, command-line-arguments\.y` + use(x) + // amd64:`MOVL\t\$3735936685, command-line-arguments\.x`, `MOVL\t\$3735936685, command-line-arguments\.y` + // arm64:`MOVW\tR27, command-line-arguments\.x`, `MOVW\tR27, command-line-arguments\.y` + use(y) +} + +//go:noinline +func use(T) {} + +//go:noinline +func addrTaken(*T) {} diff --git a/test/codegen/clobberdeadreg.go b/test/codegen/clobberdeadreg.go new file mode 100644 index 0000000..2a93c41 --- /dev/null +++ b/test/codegen/clobberdeadreg.go @@ -0,0 +1,33 @@ +// asmcheck -gcflags=-clobberdeadreg + +// +build amd64 + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +type S struct { + a, b, c, d, e, f int +} + +func F(a, b, c int, d S) { + // -2401018187971961171 is 0xdeaddeaddeaddead + // amd64:`MOVQ\t\$-2401018187971961171, AX`, `MOVQ\t\$-2401018187971961171, BX`, `MOVQ\t\$-2401018187971961171, CX` + // amd64:`MOVQ\t\$-2401018187971961171, DX`, `MOVQ\t\$-2401018187971961171, SI`, `MOVQ\t\$-2401018187971961171, DI` + // amd64:`MOVQ\t\$-2401018187971961171, R8`, `MOVQ\t\$-2401018187971961171, R9`, `MOVQ\t\$-2401018187971961171, R10` + // amd64:`MOVQ\t\$-2401018187971961171, R11`, `MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13` + // amd64:-`MOVQ\t\$-2401018187971961171, BP` // frame pointer is not clobbered + StackArgsCall([10]int{a, b, c}) + // amd64:`MOVQ\t\$-2401018187971961171, R12`, `MOVQ\t\$-2401018187971961171, R13`, `MOVQ\t\$-2401018187971961171, DX` + // amd64:-`MOVQ\t\$-2401018187971961171, AX`, -`MOVQ\t\$-2401018187971961171, R11` // register args are not clobbered + RegArgsCall(a, b, c, d) +} + +//go:noinline +func StackArgsCall([10]int) {} + +//go:noinline +//go:registerparams +func RegArgsCall(int, int, int, S) {} diff --git a/test/codegen/compare_and_branch.go b/test/codegen/compare_and_branch.go new file mode 100644 index 0000000..f751506 --- /dev/null +++ b/test/codegen/compare_and_branch.go @@ -0,0 +1,206 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +//go:noinline +func dummy() {} + +// Signed 64-bit compare-and-branch. +func si64(x, y chan int64) { + // s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, " + for <-x < <-y { + dummy() + } + + // s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, " + for <-x == <-y { + dummy() + } +} + +// Signed 64-bit compare-and-branch with 8-bit immediate. +func si64x8() { + // s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, " + for i := int64(0); i < 128; i++ { + dummy() + } + + // s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, " + for i := int64(0); i > -129; i-- { + dummy() + } + + // s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, " + for i := int64(0); i >= 128; i++ { + dummy() + } + + // s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, " + for i := int64(0); i <= -129; i-- { + dummy() + } +} + +// Unsigned 64-bit compare-and-branch. +func ui64(x, y chan uint64) { + // s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, " + for <-x > <-y { + dummy() + } + + // s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, " + for <-x != <-y { + dummy() + } +} + +// Unsigned 64-bit comparison with 8-bit immediate. +func ui64x8() { + // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, " + for i := uint64(0); i < 128; i++ { + dummy() + } + + // s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, " + for i := uint64(0); i < 256; i++ { + dummy() + } + + // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, " + for i := uint64(0); i >= 256; i-- { + dummy() + } + + // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, " + for i := uint64(1024); i > 0; i-- { + dummy() + } +} + +// Signed 32-bit compare-and-branch. +func si32(x, y chan int32) { + // s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, " + for <-x < <-y { + dummy() + } + + // s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, " + for <-x == <-y { + dummy() + } +} + +// Signed 32-bit compare-and-branch with 8-bit immediate. +func si32x8() { + // s390x:"CIJ\t[$]12, R[0-9]+, [$]127, " + for i := int32(0); i < 128; i++ { + dummy() + } + + // s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, " + for i := int32(0); i > -129; i-- { + dummy() + } + + // s390x:"CIJ\t[$]2, R[0-9]+, [$]127, " + for i := int32(0); i >= 128; i++ { + dummy() + } + + // s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, " + for i := int32(0); i <= -129; i-- { + dummy() + } +} + +// Unsigned 32-bit compare-and-branch. +func ui32(x, y chan uint32) { + // s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, " + for <-x > <-y { + dummy() + } + + // s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, " + for <-x != <-y { + dummy() + } +} + +// Unsigned 32-bit comparison with 8-bit immediate. +func ui32x8() { + // s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, " + for i := uint32(0); i < 128; i++ { + dummy() + } + + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, " + for i := uint32(0); i < 256; i++ { + dummy() + } + + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, " + for i := uint32(0); i >= 256; i-- { + dummy() + } + + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, " + for i := uint32(1024); i > 0; i-- { + dummy() + } +} + +// Signed 64-bit comparison with unsigned 8-bit immediate. +func si64xu8(x chan int64) { + // s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, " + for <-x == 128 { + dummy() + } + + // s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, " + for <-x != 255 { + dummy() + } +} + +// Signed 32-bit comparison with unsigned 8-bit immediate. +func si32xu8(x chan int32) { + // s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, " + for <-x == 255 { + dummy() + } + + // s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, " + for <-x != 128 { + dummy() + } +} + +// Unsigned 64-bit comparison with signed 8-bit immediate. +func ui64xu8(x chan uint64) { + // s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, " + for <-x == ^uint64(0) { + dummy() + } + + // s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, " + for <-x != ^uint64(127) { + dummy() + } +} + +// Unsigned 32-bit comparison with signed 8-bit immediate. +func ui32xu8(x chan uint32) { + // s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, " + for <-x == ^uint32(127) { + dummy() + } + + // s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, " + for <-x != ^uint32(0) { + dummy() + } +} diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go new file mode 100644 index 0000000..5a54a96 --- /dev/null +++ b/test/codegen/comparisons.go @@ -0,0 +1,716 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "unsafe" + +// This file contains code generation tests related to the comparison +// operators. + +// -------------- // +// Equality // +// -------------- // + +// Check that compare to constant string use 2/4/8 byte compares + +func CompareString1(s string) bool { + // amd64:`CMPW\t\(.*\), [$]` + // arm64:`MOVHU\t\(.*\), [R]`,`MOVD\t[$]`,`CMPW\tR` + // ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]` + // s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]` + return s == "xx" +} + +func CompareString2(s string) bool { + // amd64:`CMPL\t\(.*\), [$]` + // arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]` + // ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]` + // s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [$]` + return s == "xxxx" +} + +func CompareString3(s string) bool { + // amd64:`CMPQ\t\(.*\), [A-Z]` + // arm64:-`CMPW\t` + // ppc64:-`CMPW\t` + // ppc64le:-`CMPW\t` + // s390x:-`CMPW\t` + return s == "xxxxxxxx" +} + +// Check that arrays compare use 2/4/8 byte compares + +func CompareArray1(a, b [2]byte) bool { + // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // arm64:-`MOVBU\t` + // ppc64le:-`MOVBZ\t` + // s390x:-`MOVBZ\t` + return a == b +} + +func CompareArray2(a, b [3]uint16) bool { + // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + return a == b +} + +func CompareArray3(a, b [3]int16) bool { + // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:`CMPW\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + return a == b +} + +func CompareArray4(a, b [12]int8) bool { + // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:`CMPL\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + return a == b +} + +func CompareArray5(a, b [15]byte) bool { + // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + return a == b +} + +// This was a TODO in mapaccess1_faststr +func CompareArray6(a, b unsafe.Pointer) bool { + // amd64:`CMPL\t\(.*\), [A-Z]` + // arm64:`MOVWU\t\(.*\), [R]`,`CMPW\t.*, [R]` + // ppc64le:`MOVWZ\t\(.*\), [R]`,`CMPW\t.*, [R]` + // s390x:`MOVWBR\t\(.*\), [R]`,`CMPW\t.*, [R]` + return *((*[4]byte)(a)) != *((*[4]byte)(b)) +} + +// Check that some structs generate 2/4/8 byte compares. + +type T1 struct { + a [8]byte +} + +func CompareStruct1(s1, s2 T1) bool { + // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:-`CALL` + return s1 == s2 +} + +type T2 struct { + a [16]byte +} + +func CompareStruct2(s1, s2 T2) bool { + // amd64:`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:-`CALL` + return s1 == s2 +} + +// Assert that a memequal call is still generated when +// inlining would increase binary size too much. + +type T3 struct { + a [24]byte +} + +func CompareStruct3(s1, s2 T3) bool { + // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:`CALL` + return s1 == s2 +} + +type T4 struct { + a [32]byte +} + +func CompareStruct4(s1, s2 T4) bool { + // amd64:-`CMPQ\tcommand-line-arguments[.+_a-z0-9]+\(SP\), [A-Z]` + // amd64:`CALL` + return s1 == s2 +} + +// -------------- // +// Ordering // +// -------------- // + +// Test that LEAQ/ADDQconst are folded into SETx ops + +var r bool + +func CmpFold(x uint32) { + // amd64:`SETHI\t.*\(SB\)` + r = x > 4 +} + +// Test that direct comparisons with memory are generated when +// possible + +func CmpMem1(p int, q *int) bool { + // amd64:`CMPQ\t\(.*\), [A-Z]` + return p < *q +} + +func CmpMem2(p *int, q int) bool { + // amd64:`CMPQ\t\(.*\), [A-Z]` + return *p < q +} + +func CmpMem3(p *int) bool { + // amd64:`CMPQ\t\(.*\), [$]7` + return *p < 7 +} + +func CmpMem4(p *int) bool { + // amd64:`CMPQ\t\(.*\), [$]7` + return 7 < *p +} + +func CmpMem5(p **int) { + // amd64:`CMPL\truntime.writeBarrier\(SB\), [$]0` + *p = nil +} + +func CmpMem6(a []int) int { + // 386:`CMPL\s8\([A-Z]+\),` + // amd64:`CMPQ\s16\([A-Z]+\),` + if a[1] > a[2] { + return 1 + } else { + return 2 + } +} + +// Check tbz/tbnz are generated when comparing against zero on arm64 + +func CmpZero1(a int32, ptr *int) { + if a < 0 { // arm64:"TBZ" + *ptr = 0 + } +} + +func CmpZero2(a int64, ptr *int) { + if a < 0 { // arm64:"TBZ" + *ptr = 0 + } +} + +func CmpZero3(a int32, ptr *int) { + if a >= 0 { // arm64:"TBNZ" + *ptr = 0 + } +} + +func CmpZero4(a int64, ptr *int) { + if a >= 0 { // arm64:"TBNZ" + *ptr = 0 + } +} + +func CmpToZero(a, b, d int32, e, f int64, deOptC0, deOptC1 bool) int32 { + // arm:`TST`,-`AND` + // arm64:`TSTW`,-`AND` + // 386:`TESTL`,-`ANDL` + // amd64:`TESTL`,-`ANDL` + c0 := a&b < 0 + // arm:`CMN`,-`ADD` + // arm64:`CMNW`,-`ADD` + c1 := a+b < 0 + // arm:`TEQ`,-`XOR` + c2 := a^b < 0 + // arm64:`TST`,-`AND` + // amd64:`TESTQ`,-`ANDQ` + c3 := e&f < 0 + // arm64:`CMN`,-`ADD` + c4 := e+f < 0 + // not optimized to single CMNW/CMN due to further use of b+d + // arm64:`ADD`,-`CMNW` + // arm:`ADD`,-`CMN` + c5 := b+d == 0 + // not optimized to single TSTW/TST due to further use of a&d + // arm64:`AND`,-`TSTW` + // arm:`AND`,-`TST` + // 386:`ANDL` + c6 := a&d >= 0 + // arm64:`TST\sR[0-9]+<<3,\sR[0-9]+` + c7 := e&(f<<3) < 0 + // arm64:`CMN\sR[0-9]+<<3,\sR[0-9]+` + c8 := e+(f<<3) < 0 + if c0 { + return 1 + } else if c1 { + return 2 + } else if c2 { + return 3 + } else if c3 { + return 4 + } else if c4 { + return 5 + } else if c5 { + return 6 + } else if c6 { + return 7 + } else if c7 { + return 9 + } else if c8 { + return 10 + } else if deOptC0 { + return b + d + } else if deOptC1 { + return a & d + } else { + return 0 + } +} + +func CmpLogicalToZero(a, b, c uint32, d, e uint64) uint64 { + + // ppc64:"ANDCC",-"CMPW" + // ppc64le:"ANDCC",-"CMPW" + // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64" + if a&63 == 0 { + return 1 + } + + // ppc64:"ANDCC",-"CMP" + // ppc64le:"ANDCC",-"CMP" + // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64" + if d&255 == 0 { + return 1 + } + + // ppc64:"ANDCC",-"CMP" + // ppc64le:"ANDCC",-"CMP" + // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64" + if d&e == 0 { + return 1 + } + // ppc64:"ORCC",-"CMP" + // ppc64le:"ORCC",-"CMP" + // wasm:"I64Eqz",-"I32Eqz",-"I64ExtendI32U",-"I32WrapI64" + if d|e == 0 { + return 1 + } + + // ppc64:"XORCC",-"CMP" + // ppc64le:"XORCC",-"CMP" + // wasm:"I64Eqz","I32Eqz",-"I64ExtendI32U",-"I32WrapI64" + if e^d == 0 { + return 1 + } + return 0 +} + +// The following CmpToZero_ex* check that cmp|cmn with bmi|bpl are generated for +// 'comparing to zero' expressions + +// var + const +// 'x-const' might be canonicalized to 'x+(-const)', so we check both +// CMN and CMP for subtraction expressions to make the pattern robust. +func CmpToZero_ex1(a int64, e int32) int { + // arm64:`CMN`,-`ADD`,`(BMI|BPL)` + if a+3 < 0 { + return 1 + } + + // arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)` + if a+5 <= 0 { + return 1 + } + + // arm64:`CMN`,-`ADD`,`(BMI|BPL)` + if a+13 >= 0 { + return 2 + } + + // arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)` + if a-7 < 0 { + return 3 + } + + // arm64:`SUB`,`TBZ` + if a-11 >= 0 { + return 4 + } + + // arm64:`SUB`,`CMP`,`BGT` + if a-19 > 0 { + return 4 + } + + // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)` + // arm:`CMN`,-`ADD`,`(BMI|BPL)` + if e+3 < 0 { + return 5 + } + + // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)` + // arm:`CMN`,-`ADD`,`(BMI|BPL)` + if e+13 >= 0 { + return 6 + } + + // arm64:`CMPW|CMNW`,`(BMI|BPL)` + // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)` + if e-7 < 0 { + return 7 + } + + // arm64:`SUB`,`TBNZ` + // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)` + if e-11 >= 0 { + return 8 + } + + return 0 +} + +// var + var +// TODO: optimize 'var - var' +func CmpToZero_ex2(a, b, c int64, e, f, g int32) int { + // arm64:`CMN`,-`ADD`,`(BMI|BPL)` + if a+b < 0 { + return 1 + } + + // arm64:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)` + if a+c <= 0 { + return 1 + } + + // arm64:`CMN`,-`ADD`,`(BMI|BPL)` + if b+c >= 0 { + return 2 + } + + // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)` + // arm:`CMN`,-`ADD`,`(BMI|BPL)` + if e+f < 0 { + return 5 + } + + // arm64:`CMNW`,-`ADDW`,`(BMI|BPL)` + // arm:`CMN`,-`ADD`,`(BMI|BPL)` + if f+g >= 0 { + return 6 + } + return 0 +} + +// var + var*var +func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int { + // arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)` + if a+b*c < 0 { + return 1 + } + + // arm64:`CMN`,-`MADD`,`MUL`,`(BMI|BPL)` + if b+c*d >= 0 { + return 2 + } + + // arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)` + // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)` + if e+f*g > 0 { + return 5 + } + + // arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)` + // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)` + if f+g*h <= 0 { + return 6 + } + return 0 +} + +// var - var*var +func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int { + // arm64:`CMP`,-`MSUB`,`MUL`,`BEQ`,`(BMI|BPL)` + if a-b*c > 0 { + return 1 + } + + // arm64:`CMP`,-`MSUB`,`MUL`,`(BMI|BPL)` + if b-c*d >= 0 { + return 2 + } + + // arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)` + if e-f*g < 0 { + return 5 + } + + // arm64:`CMPW`,-`MSUBW`,`MULW`,`(BMI|BPL)` + if f-g*h >= 0 { + return 6 + } + return 0 +} + +func CmpToZero_ex5(e, f int32, u uint32) int { + // arm:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)` + if e+f<<1 > 0 { + return 1 + } + + // arm:`CMP`,-`SUB`,`(BMI|BPL)` + if f-int32(u>>2) >= 0 { + return 2 + } + return 0 +} +func UintLtZero(a uint8, b uint16, c uint32, d uint64) int { + // amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCC|JCS)` + // arm64: -`(CMPW|CMP|BHS|BLO)` + if a < 0 || b < 0 || c < 0 || d < 0 { + return 1 + } + return 0 +} + +func UintGeqZero(a uint8, b uint16, c uint32, d uint64) int { + // amd64: -`(TESTB|TESTW|TESTL|TESTQ|JCS|JCC)` + // arm64: -`(CMPW|CMP|BLO|BHS)` + if a >= 0 || b >= 0 || c >= 0 || d >= 0 { + return 1 + } + return 0 +} + +func UintGtZero(a uint8, b uint16, c uint32, d uint64) int { + // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLS|BHI)` + if a > 0 || b > 0 || c > 0 || d > 0 { + return 1 + } + return 0 +} + +func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int { + // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHI|BLS)` + if a <= 0 || b <= 0 || c <= 0 || d <= 0 { + return 1 + } + return 0 +} + +func UintLtOne(a uint8, b uint16, c uint32, d uint64) int { + // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHS|BLO)` + if a < 1 || b < 1 || c < 1 || d < 1 { + return 1 + } + return 0 +} + +func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int { + // arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLO|BHS)` + if a >= 1 || b >= 1 || c >= 1 || d >= 1 { + return 1 + } + return 0 +} + +func CmpToZeroU_ex1(a uint8, b uint16, c uint32, d uint64) int { + // wasm:"I64Eqz"-"I64LtU" + if 0 < a { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if 0 < b { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if 0 < c { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if 0 < d { + return 1 + } + return 0 +} + +func CmpToZeroU_ex2(a uint8, b uint16, c uint32, d uint64) int { + // wasm:"I64Eqz"-"I64LeU" + if a <= 0 { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if b <= 0 { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if c <= 0 { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if d <= 0 { + return 1 + } + return 0 +} + +func CmpToOneU_ex1(a uint8, b uint16, c uint32, d uint64) int { + // wasm:"I64Eqz"-"I64LtU" + if a < 1 { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if b < 1 { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if c < 1 { + return 1 + } + // wasm:"I64Eqz"-"I64LtU" + if d < 1 { + return 1 + } + return 0 +} + +func CmpToOneU_ex2(a uint8, b uint16, c uint32, d uint64) int { + // wasm:"I64Eqz"-"I64LeU" + if 1 <= a { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if 1 <= b { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if 1 <= c { + return 1 + } + // wasm:"I64Eqz"-"I64LeU" + if 1 <= d { + return 1 + } + return 0 +} + +// Check that small memequals are replaced with eq instructions + +func equalConstString1() bool { + a := string("A") + b := string("Z") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a == b +} + +func equalVarString1(a string) bool { + b := string("Z") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a[:1] == b +} + +func equalConstString2() bool { + a := string("AA") + b := string("ZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a == b +} + +func equalVarString2(a string) bool { + b := string("ZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a[:2] == b +} + +func equalConstString4() bool { + a := string("AAAA") + b := string("ZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a == b +} + +func equalVarString4(a string) bool { + b := string("ZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a[:4] == b +} + +func equalConstString8() bool { + a := string("AAAAAAAA") + b := string("ZZZZZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a == b +} + +func equalVarString8(a string) bool { + b := string("ZZZZZZZZ") + // amd64:-".*memequal" + // arm64:-".*memequal" + // ppc64:-".*memequal" + // ppc64le:-".*memequal" + return a[:8] == b +} + +func cmpToCmn(a, b, c, d int) int { + var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int + // arm64:`CMN`,-`CMP` + if a < -8 { + c1 = 1 + } + // arm64:`CMN`,-`CMP` + if a+1 == 0 { + c2 = 1 + } + // arm64:`CMN`,-`CMP` + if a+3 != 0 { + c3 = 1 + } + // arm64:`CMN`,-`CMP` + if a+b == 0 { + c4 = 1 + } + // arm64:`CMN`,-`CMP` + if b+c != 0 { + c5 = 1 + } + // arm64:`CMN`,-`CMP` + if a == -c { + c6 = 1 + } + // arm64:`CMN`,-`CMP` + if b != -d { + c7 = 1 + } + // arm64:`CMN`,-`CMP` + if a*b+c == 0 { + c8 = 1 + } + // arm64:`CMN`,-`CMP` + if a*c+b != 0 { + c9 = 1 + } + // arm64:`CMP`,-`CMN` + if b*c-a == 0 { + c10 = 1 + } + // arm64:`CMP`,-`CMN` + if a*d-b != 0 { + c11 = 1 + } + return c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9 + c10 + c11 +} diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go new file mode 100644 index 0000000..7b0f32e --- /dev/null +++ b/test/codegen/condmove.go @@ -0,0 +1,466 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func cmovint(c int) int { + x := c + 4 + if x < 0 { + x = 182 + } + // amd64:"CMOVQLT" + // arm64:"CSEL\tLT" + // ppc64:"ISEL\t[$]0" + // ppc64le:"ISEL\t[$]0" + // wasm:"Select" + return x +} + +func cmovchan(x, y chan int) chan int { + if x != y { + x = y + } + // amd64:"CMOVQNE" + // arm64:"CSEL\tNE" + // ppc64:"ISEL\t[$]2" + // ppc64le:"ISEL\t[$]2" + // wasm:"Select" + return x +} + +func cmovuintptr(x, y uintptr) uintptr { + if x < y { + x = -y + } + // amd64:"CMOVQ(HI|CS)" + // arm64:"CSNEG\tLS" + // ppc64:"ISEL\t[$]1" + // ppc64le:"ISEL\t[$]1" + // wasm:"Select" + return x +} + +func cmov32bit(x, y uint32) uint32 { + if x < y { + x = -y + } + // amd64:"CMOVL(HI|CS)" + // arm64:"CSNEG\t(LS|HS)" + // ppc64:"ISEL\t[$]1" + // ppc64le:"ISEL\t[$]1" + // wasm:"Select" + return x +} + +func cmov16bit(x, y uint16) uint16 { + if x < y { + x = -y + } + // amd64:"CMOVW(HI|CS)" + // arm64:"CSNEG\t(LS|HS)" + // ppc64:"ISEL\t[$]0" + // ppc64le:"ISEL\t[$]0" + // wasm:"Select" + return x +} + +// Floating point comparison. For EQ/NE, we must +// generate special code to handle NaNs. +func cmovfloateq(x, y float64) int { + a := 128 + if x == y { + a = 256 + } + // amd64:"CMOVQNE","CMOVQPC" + // arm64:"CSEL\tEQ" + // ppc64:"ISEL\t[$]2" + // ppc64le:"ISEL\t[$]2" + // wasm:"Select" + return a +} + +func cmovfloatne(x, y float64) int { + a := 128 + if x != y { + a = 256 + } + // amd64:"CMOVQNE","CMOVQPS" + // arm64:"CSEL\tNE" + // ppc64:"ISEL\t[$]2" + // ppc64le:"ISEL\t[$]2" + // wasm:"Select" + return a +} + +//go:noinline +func frexp(f float64) (frac float64, exp int) { + return 1.0, 4 +} + +//go:noinline +func ldexp(frac float64, exp int) float64 { + return 1.0 +} + +// Generate a CMOV with a floating comparison and integer move. +func cmovfloatint2(x, y float64) float64 { + yfr, yexp := 4.0, 5 + + r := x + for r >= y { + rfr, rexp := frexp(r) + if rfr < yfr { + rexp = rexp - 1 + } + // amd64:"CMOVQHI" + // arm64:"CSEL\tMI" + // ppc64:"ISEL\t[$]0" + // ppc64le:"ISEL\t[$]0" + // wasm:"Select" + r = r - ldexp(y, rexp-yexp) + } + return r +} + +func cmovloaded(x [4]int, y int) int { + if x[2] != 0 { + y = x[2] + } else { + y = y >> 2 + } + // amd64:"CMOVQNE" + // arm64:"CSEL\tNE" + // ppc64:"ISEL\t[$]2" + // ppc64le:"ISEL\t[$]2" + // wasm:"Select" + return y +} + +func cmovuintptr2(x, y uintptr) uintptr { + a := x * 2 + if a == 0 { + a = 256 + } + // amd64:"CMOVQEQ" + // arm64:"CSEL\tEQ" + // ppc64:"ISEL\t[$]2" + // ppc64le:"ISEL\t[$]2" + // wasm:"Select" + return a +} + +// Floating point CMOVs are not supported by amd64/arm64/ppc64/ppc64le +func cmovfloatmove(x, y int) float64 { + a := 1.0 + if x <= y { + a = 2.0 + } + // amd64:-"CMOV" + // arm64:-"CSEL" + // ppc64:-"ISEL" + // ppc64le:-"ISEL" + // wasm:-"Select" + return a +} + +// On amd64, the following patterns trigger comparison inversion. +// Test that we correctly invert the CMOV condition +var gsink int64 +var gusink uint64 + +func cmovinvert1(x, y int64) int64 { + if x < gsink { + y = -y + } + // amd64:"CMOVQGT" + return y +} +func cmovinvert2(x, y int64) int64 { + if x <= gsink { + y = -y + } + // amd64:"CMOVQGE" + return y +} +func cmovinvert3(x, y int64) int64 { + if x == gsink { + y = -y + } + // amd64:"CMOVQEQ" + return y +} +func cmovinvert4(x, y int64) int64 { + if x != gsink { + y = -y + } + // amd64:"CMOVQNE" + return y +} +func cmovinvert5(x, y uint64) uint64 { + if x > gusink { + y = -y + } + // amd64:"CMOVQCS" + return y +} +func cmovinvert6(x, y uint64) uint64 { + if x >= gusink { + y = -y + } + // amd64:"CMOVQLS" + return y +} + +func cmovload(a []int, i int, b bool) int { + if b { + i++ + } + // See issue 26306 + // amd64:-"CMOVQNE" + return a[i] +} + +func cmovstore(a []int, i int, b bool) { + if b { + i++ + } + // amd64:"CMOVQNE" + a[i] = 7 +} + +var r0, r1, r2, r3, r4, r5 int + +func cmovinc(cond bool, a, b, c int) { + var x0, x1 int + + if cond { + x0 = a + } else { + x0 = b + 1 + } + // arm64:"CSINC\tNE", -"CSEL" + r0 = x0 + + if cond { + x1 = b + 1 + } else { + x1 = a + } + // arm64:"CSINC\tEQ", -"CSEL" + r1 = x1 + + if cond { + c++ + } + // arm64:"CSINC\tEQ", -"CSEL" + r2 = c +} + +func cmovinv(cond bool, a, b int) { + var x0, x1 int + + if cond { + x0 = a + } else { + x0 = ^b + } + // arm64:"CSINV\tNE", -"CSEL" + r0 = x0 + + if cond { + x1 = ^b + } else { + x1 = a + } + // arm64:"CSINV\tEQ", -"CSEL" + r1 = x1 +} + +func cmovneg(cond bool, a, b, c int) { + var x0, x1 int + + if cond { + x0 = a + } else { + x0 = -b + } + // arm64:"CSNEG\tNE", -"CSEL" + r0 = x0 + + if cond { + x1 = -b + } else { + x1 = a + } + // arm64:"CSNEG\tEQ", -"CSEL" + r1 = x1 +} + +func cmovsetm(cond bool, x int) { + var x0, x1 int + + if cond { + x0 = -1 + } else { + x0 = 0 + } + // arm64:"CSETM\tNE", -"CSEL" + r0 = x0 + + if cond { + x1 = 0 + } else { + x1 = -1 + } + // arm64:"CSETM\tEQ", -"CSEL" + r1 = x1 +} + +func cmovFcmp0(s, t float64, a, b int) { + var x0, x1, x2, x3, x4, x5 int + + if s < t { + x0 = a + } else { + x0 = b + 1 + } + // arm64:"CSINC\tMI", -"CSEL" + r0 = x0 + + if s <= t { + x1 = a + } else { + x1 = ^b + } + // arm64:"CSINV\tLS", -"CSEL" + r1 = x1 + + if s > t { + x2 = a + } else { + x2 = -b + } + // arm64:"CSNEG\tMI", -"CSEL" + r2 = x2 + + if s >= t { + x3 = -1 + } else { + x3 = 0 + } + // arm64:"CSETM\tLS", -"CSEL" + r3 = x3 + + if s == t { + x4 = a + } else { + x4 = b + 1 + } + // arm64:"CSINC\tEQ", -"CSEL" + r4 = x4 + + if s != t { + x5 = a + } else { + x5 = b + 1 + } + // arm64:"CSINC\tNE", -"CSEL" + r5 = x5 +} + +func cmovFcmp1(s, t float64, a, b int) { + var x0, x1, x2, x3, x4, x5 int + + if s < t { + x0 = b + 1 + } else { + x0 = a + } + // arm64:"CSINC\tPL", -"CSEL" + r0 = x0 + + if s <= t { + x1 = ^b + } else { + x1 = a + } + // arm64:"CSINV\tHI", -"CSEL" + r1 = x1 + + if s > t { + x2 = -b + } else { + x2 = a + } + // arm64:"CSNEG\tPL", -"CSEL" + r2 = x2 + + if s >= t { + x3 = 0 + } else { + x3 = -1 + } + // arm64:"CSETM\tHI", -"CSEL" + r3 = x3 + + if s == t { + x4 = b + 1 + } else { + x4 = a + } + // arm64:"CSINC\tNE", -"CSEL" + r4 = x4 + + if s != t { + x5 = b + 1 + } else { + x5 = a + } + // arm64:"CSINC\tEQ", -"CSEL" + r5 = x5 +} + +func cmovzero1(c bool) int { + var x int + if c { + x = 182 + } + // loong64:"MASKEQZ", -"MASKNEZ" + return x +} + +func cmovzero2(c bool) int { + var x int + if !c { + x = 182 + } + // loong64:"MASKNEZ", -"MASKEQZ" + return x +} + +// Conditionally selecting between a value or 0 can be done without +// an extra load of 0 to a register on PPC64 by using R0 (which always +// holds the value $0) instead. Verify both cases where either arg1 +// or arg2 is zero. +func cmovzeroreg0(a, b int) int { + x := 0 + if a == b { + x = a + } + // ppc64:"ISEL\t[$]2, R[0-9]+, R0, R[0-9]+" + // ppc64le:"ISEL\t[$]2, R[0-9]+, R0, R[0-9]+" + return x +} + +func cmovzeroreg1(a, b int) int { + x := a + if a == b { + x = 0 + } + // ppc64:"ISEL\t[$]2, R0, R[0-9]+, R[0-9]+" + // ppc64le:"ISEL\t[$]2, R0, R[0-9]+, R[0-9]+" + return x +} diff --git a/test/codegen/copy.go b/test/codegen/copy.go new file mode 100644 index 0000000..9b3bf75 --- /dev/null +++ b/test/codegen/copy.go @@ -0,0 +1,168 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "runtime" + +// Check small copies are replaced with moves. + +func movesmall4() { + x := [...]byte{1, 2, 3, 4} + // 386:-".*memmove" + // amd64:-".*memmove" + // arm:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(x[1:], x[:]) +} + +func movesmall7() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7} + // 386:-".*memmove" + // amd64:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(x[1:], x[:]) +} + +func movesmall16() { + x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + // amd64:-".*memmove" + // ppc64:".*memmove" + // ppc64le:".*memmove" + copy(x[1:], x[:]) +} + +var x [256]byte + +// Check that large disjoint copies are replaced with moves. + +func moveDisjointStack32() { + var s [32]byte + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + // ppc64le/power8:"LXVD2X",-"ADD",-"BC" + // ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC" + copy(s[:], x[:32]) + runtime.KeepAlive(&s) +} + +func moveDisjointStack64() { + var s [96]byte + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + // ppc64le/power8:"LXVD2X","ADD","BC" + // ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC" + copy(s[:], x[:96]) + runtime.KeepAlive(&s) +} + +func moveDisjointStack() { + var s [256]byte + // s390x:-".*memmove" + // amd64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + // ppc64le/power8:"LXVD2X" + // ppc64le/power9:"LXV",-"LXVD2X" + copy(s[:], x[:]) + runtime.KeepAlive(&s) +} + +func moveDisjointArg(b *[256]byte) { + var s [256]byte + // s390x:-".*memmove" + // amd64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + // ppc64le/power8:"LXVD2X" + // ppc64le/power9:"LXV",-"LXVD2X" + copy(s[:], b[:]) + runtime.KeepAlive(&s) +} + +func moveDisjointNoOverlap(a *[256]byte) { + // s390x:-".*memmove" + // amd64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + // ppc64le/power8:"LXVD2X" + // ppc64le/power9:"LXV",-"LXVD2X" + copy(a[:], a[128:]) +} + +// Check arch-specific memmove lowering. See issue 41662 fot details + +func moveArchLowering1(b []byte, x *[1]byte) { + _ = b[1] + // amd64:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(b, x[:]) +} + +func moveArchLowering2(b []byte, x *[2]byte) { + _ = b[2] + // amd64:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(b, x[:]) +} + +func moveArchLowering4(b []byte, x *[4]byte) { + _ = b[4] + // amd64:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(b, x[:]) +} + +func moveArchLowering8(b []byte, x *[8]byte) { + _ = b[8] + // amd64:-".*memmove" + // arm64:-".*memmove" + // ppc64:-".*memmove" + // ppc64le:-".*memmove" + copy(b, x[:]) +} + +func moveArchLowering16(b []byte, x *[16]byte) { + _ = b[16] + // amd64:-".*memmove" + copy(b, x[:]) +} + +// Check that no branches are generated when the pointers are [not] equal. + +func ptrEqual() { + // amd64:-"JEQ",-"JNE" + // ppc64:-"BEQ",-"BNE" + // ppc64le:-"BEQ",-"BNE" + // s390x:-"BEQ",-"BNE" + copy(x[:], x[:]) +} + +func ptrOneOffset() { + // amd64:-"JEQ",-"JNE" + // ppc64:-"BEQ",-"BNE" + // ppc64le:-"BEQ",-"BNE" + // s390x:-"BEQ",-"BNE" + copy(x[1:], x[:]) +} + +func ptrBothOffset() { + // amd64:-"JEQ",-"JNE" + // ppc64:-"BEQ",-"BNE" + // ppc64le:-"BEQ",-"BNE" + // s390x:-"BEQ",-"BNE" + copy(x[1:], x[2:]) +} diff --git a/test/codegen/floats.go b/test/codegen/floats.go new file mode 100644 index 0000000..397cbb8 --- /dev/null +++ b/test/codegen/floats.go @@ -0,0 +1,156 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains codegen tests related to arithmetic +// simplifications and optimizations on float types. +// For codegen tests on integer types, see arithmetic.go. + +// --------------------- // +// Strength-reduce // +// --------------------- // + +func Mul2(f float64) float64 { + // 386/sse2:"ADDSD",-"MULSD" + // amd64:"ADDSD",-"MULSD" + // arm/7:"ADDD",-"MULD" + // arm64:"FADDD",-"FMULD" + // ppc64:"FADD",-"FMUL" + // ppc64le:"FADD",-"FMUL" + return f * 2.0 +} + +func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { + // 386/sse2:"MULSD",-"DIVSD" + // amd64:"MULSD",-"DIVSD" + // arm/7:"MULD",-"DIVD" + // arm64:"FMULD",-"FDIVD" + // ppc64:"FMUL",-"FDIV" + // ppc64le:"FMUL",-"FDIV" + x := f1 / 16.0 + + // 386/sse2:"MULSD",-"DIVSD" + // amd64:"MULSD",-"DIVSD" + // arm/7:"MULD",-"DIVD" + // arm64:"FMULD",-"FDIVD" + // ppc64:"FMUL",-"FDIVD" + // ppc64le:"FMUL",-"FDIVD" + y := f2 / 0.125 + + // 386/sse2:"ADDSD",-"DIVSD",-"MULSD" + // amd64:"ADDSD",-"DIVSD",-"MULSD" + // arm/7:"ADDD",-"MULD",-"DIVD" + // arm64:"FADDD",-"FMULD",-"FDIVD" + // ppc64:"FADD",-"FMUL",-"FDIV" + // ppc64le:"FADD",-"FMUL",-"FDIV" + z := f3 / 0.5 + + return x, y, z +} + +func indexLoad(b0 []float32, b1 float32, idx int) float32 { + // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+` + return b0[idx] * b1 +} + +func indexStore(b0 []float64, b1 float64, idx int) { + // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)` + b0[idx] = b1 +} + +// ----------- // +// Fused // +// ----------- // + +func FusedAdd32(x, y, z float32) float32 { + // s390x:"FMADDS\t" + // ppc64:"FMADDS\t" + // ppc64le:"FMADDS\t" + // arm64:"FMADDS" + return x*y + z +} + +func FusedSub32_a(x, y, z float32) float32 { + // s390x:"FMSUBS\t" + // ppc64:"FMSUBS\t" + // ppc64le:"FMSUBS\t" + return x*y - z +} + +func FusedSub32_b(x, y, z float32) float32 { + // arm64:"FMSUBS" + return z - x*y +} + +func FusedAdd64(x, y, z float64) float64 { + // s390x:"FMADD\t" + // ppc64:"FMADD\t" + // ppc64le:"FMADD\t" + // arm64:"FMADDD" + return x*y + z +} + +func FusedSub64_a(x, y, z float64) float64 { + // s390x:"FMSUB\t" + // ppc64:"FMSUB\t" + // ppc64le:"FMSUB\t" + return x*y - z +} + +func FusedSub64_b(x, y, z float64) float64 { + // arm64:"FMSUBD" + return z - x*y +} + +func Cmp(f float64) bool { + // arm64:"FCMPD","(BGT|BLE|BMI|BPL)",-"CSET\tGT",-"CBZ" + return f > 4 || f < -4 +} + +func CmpZero64(f float64) bool { + // s390x:"LTDBR",-"FCMPU" + return f <= 0 +} + +func CmpZero32(f float32) bool { + // s390x:"LTEBR",-"CEBR" + return f <= 0 +} + +func CmpWithSub(a float64, b float64) bool { + f := a - b + // s390x:-"LTDBR" + return f <= 0 +} + +func CmpWithAdd(a float64, b float64) bool { + f := a + b + // s390x:-"LTDBR" + return f <= 0 +} + +// ---------------- // +// Non-floats // +// ---------------- // + +// We should make sure that the compiler doesn't generate floating point +// instructions for non-float operations on Plan 9, because floating point +// operations are not allowed in the note handler. + +func ArrayZero() [16]byte { + // amd64:"MOVUPS" + // plan9/amd64/:-"MOVUPS" + var a [16]byte + return a +} + +func ArrayCopy(a [16]byte) (b [16]byte) { + // amd64:"MOVUPS" + // plan9/amd64/:-"MOVUPS" + b = a + return +} diff --git a/test/codegen/fuse.go b/test/codegen/fuse.go new file mode 100644 index 0000000..79dd337 --- /dev/null +++ b/test/codegen/fuse.go @@ -0,0 +1,197 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// Notes: +// - these examples use channels to provide a source of +// unknown values that cannot be optimized away +// - these examples use for loops to force branches +// backward (predicted taken) + +// ---------------------------------- // +// signed integer range (conjunction) // +// ---------------------------------- // + +func si1c(c <-chan int64) { + // amd64:"CMPQ\t.+, [$]256" + // s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255" + for x := <-c; x >= 0 && x < 256; x = <-c { + } +} + +func si2c(c <-chan int32) { + // amd64:"CMPL\t.+, [$]256" + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255" + for x := <-c; x >= 0 && x < 256; x = <-c { + } +} + +func si3c(c <-chan int16) { + // amd64:"CMPW\t.+, [$]256" + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255" + for x := <-c; x >= 0 && x < 256; x = <-c { + } +} + +func si4c(c <-chan int8) { + // amd64:"CMPB\t.+, [$]10" + // s390x:"CLIJ\t[$]4, R[0-9]+, [$]10" + for x := <-c; x >= 0 && x < 10; x = <-c { + } +} + +func si5c(c <-chan int64) { + // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5," + // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5," + for x := <-c; x < 256 && x > 4; x = <-c { + } +} + +func si6c(c <-chan int32) { + // amd64:"CMPL\t.+, [$]255","DECL\t" + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1," + for x := <-c; x > 0 && x <= 256; x = <-c { + } +} + +func si7c(c <-chan int16) { + // amd64:"CMPW\t.+, [$]60","ADDL\t[$]10," + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]60","ADDW\t[$]10," + for x := <-c; x >= -10 && x <= 50; x = <-c { + } +} + +func si8c(c <-chan int8) { + // amd64:"CMPB\t.+, [$]126","ADDL\t[$]126," + // s390x:"CLIJ\t[$]4, R[0-9]+, [$]126","ADDW\t[$]126," + for x := <-c; x >= -126 && x < 0; x = <-c { + } +} + +// ---------------------------------- // +// signed integer range (disjunction) // +// ---------------------------------- // + +func si1d(c <-chan int64) { + // amd64:"CMPQ\t.+, [$]256" + // s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255" + for x := <-c; x < 0 || x >= 256; x = <-c { + } +} + +func si2d(c <-chan int32) { + // amd64:"CMPL\t.+, [$]256" + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255" + for x := <-c; x < 0 || x >= 256; x = <-c { + } +} + +func si3d(c <-chan int16) { + // amd64:"CMPW\t.+, [$]256" + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255" + for x := <-c; x < 0 || x >= 256; x = <-c { + } +} + +func si4d(c <-chan int8) { + // amd64:"CMPB\t.+, [$]10" + // s390x:"CLIJ\t[$]10, R[0-9]+, [$]10" + for x := <-c; x < 0 || x >= 10; x = <-c { + } +} + +func si5d(c <-chan int64) { + // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5," + // s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5," + for x := <-c; x >= 256 || x <= 4; x = <-c { + } +} + +func si6d(c <-chan int32) { + // amd64:"CMPL\t.+, [$]255","DECL\t" + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]255","ADDW\t[$]-1," + for x := <-c; x <= 0 || x > 256; x = <-c { + } +} + +func si7d(c <-chan int16) { + // amd64:"CMPW\t.+, [$]60","ADDL\t[$]10," + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]60","ADDW\t[$]10," + for x := <-c; x < -10 || x > 50; x = <-c { + } +} + +func si8d(c <-chan int8) { + // amd64:"CMPB\t.+, [$]126","ADDL\t[$]126," + // s390x:"CLIJ\t[$]10, R[0-9]+, [$]126","ADDW\t[$]126," + for x := <-c; x < -126 || x >= 0; x = <-c { + } +} + +// ------------------------------------ // +// unsigned integer range (conjunction) // +// ------------------------------------ // + +func ui1c(c <-chan uint64) { + // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5," + // s390x:"CLGIJ\t[$]4, R[0-9]+, [$]251","ADD\t[$]-5," + for x := <-c; x < 256 && x > 4; x = <-c { + } +} + +func ui2c(c <-chan uint32) { + // amd64:"CMPL\t.+, [$]255","DECL\t" + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]255","ADDW\t[$]-1," + for x := <-c; x > 0 && x <= 256; x = <-c { + } +} + +func ui3c(c <-chan uint16) { + // amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10," + // s390x:"CLIJ\t[$]12, R[0-9]+, [$]40","ADDW\t[$]-10," + for x := <-c; x >= 10 && x <= 50; x = <-c { + } +} + +func ui4c(c <-chan uint8) { + // amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126," + // s390x:"CLIJ\t[$]4, R[0-9]+, [$]2","ADDW\t[$]-126," + for x := <-c; x >= 126 && x < 128; x = <-c { + } +} + +// ------------------------------------ // +// unsigned integer range (disjunction) // +// ------------------------------------ // + +func ui1d(c <-chan uint64) { + // amd64:"CMPQ\t.+, [$]251","ADDQ\t[$]-5," + // s390x:"CLGIJ\t[$]10, R[0-9]+, [$]251","ADD\t[$]-5," + for x := <-c; x >= 256 || x <= 4; x = <-c { + } +} + +func ui2d(c <-chan uint32) { + // amd64:"CMPL\t.+, [$]254","ADDL\t[$]-2," + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]254","ADDW\t[$]-2," + for x := <-c; x <= 1 || x > 256; x = <-c { + } +} + +func ui3d(c <-chan uint16) { + // amd64:"CMPW\t.+, [$]40","ADDL\t[$]-10," + // s390x:"CLIJ\t[$]2, R[0-9]+, [$]40","ADDW\t[$]-10," + for x := <-c; x < 10 || x > 50; x = <-c { + } +} + +func ui4d(c <-chan uint8) { + // amd64:"CMPB\t.+, [$]2","ADDL\t[$]-126," + // s390x:"CLIJ\t[$]10, R[0-9]+, [$]2","ADDW\t[$]-126," + for x := <-c; x < 126 || x >= 128; x = <-c { + } +} diff --git a/test/codegen/ifaces.go b/test/codegen/ifaces.go new file mode 100644 index 0000000..d773845 --- /dev/null +++ b/test/codegen/ifaces.go @@ -0,0 +1,21 @@ +// asmcheck + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +type I interface { M() } + +func NopConvertIface(x I) I { + // amd64:-`.*runtime.convI2I` + return I(x) +} + +func NopConvertGeneric[T any](x T) T { + // amd64:-`.*runtime.convI2I` + return T(x) +} + +var NopConvertGenericIface = NopConvertGeneric[I] diff --git a/test/codegen/issue22703.go b/test/codegen/issue22703.go new file mode 100644 index 0000000..0201de6 --- /dev/null +++ b/test/codegen/issue22703.go @@ -0,0 +1,535 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +type I interface { + foo000() + foo001() + foo002() + foo003() + foo004() + foo005() + foo006() + foo007() + foo008() + foo009() + foo010() + foo011() + foo012() + foo013() + foo014() + foo015() + foo016() + foo017() + foo018() + foo019() + foo020() + foo021() + foo022() + foo023() + foo024() + foo025() + foo026() + foo027() + foo028() + foo029() + foo030() + foo031() + foo032() + foo033() + foo034() + foo035() + foo036() + foo037() + foo038() + foo039() + foo040() + foo041() + foo042() + foo043() + foo044() + foo045() + foo046() + foo047() + foo048() + foo049() + foo050() + foo051() + foo052() + foo053() + foo054() + foo055() + foo056() + foo057() + foo058() + foo059() + foo060() + foo061() + foo062() + foo063() + foo064() + foo065() + foo066() + foo067() + foo068() + foo069() + foo070() + foo071() + foo072() + foo073() + foo074() + foo075() + foo076() + foo077() + foo078() + foo079() + foo080() + foo081() + foo082() + foo083() + foo084() + foo085() + foo086() + foo087() + foo088() + foo089() + foo090() + foo091() + foo092() + foo093() + foo094() + foo095() + foo096() + foo097() + foo098() + foo099() + foo100() + foo101() + foo102() + foo103() + foo104() + foo105() + foo106() + foo107() + foo108() + foo109() + foo110() + foo111() + foo112() + foo113() + foo114() + foo115() + foo116() + foo117() + foo118() + foo119() + foo120() + foo121() + foo122() + foo123() + foo124() + foo125() + foo126() + foo127() + foo128() + foo129() + foo130() + foo131() + foo132() + foo133() + foo134() + foo135() + foo136() + foo137() + foo138() + foo139() + foo140() + foo141() + foo142() + foo143() + foo144() + foo145() + foo146() + foo147() + foo148() + foo149() + foo150() + foo151() + foo152() + foo153() + foo154() + foo155() + foo156() + foo157() + foo158() + foo159() + foo160() + foo161() + foo162() + foo163() + foo164() + foo165() + foo166() + foo167() + foo168() + foo169() + foo170() + foo171() + foo172() + foo173() + foo174() + foo175() + foo176() + foo177() + foo178() + foo179() + foo180() + foo181() + foo182() + foo183() + foo184() + foo185() + foo186() + foo187() + foo188() + foo189() + foo190() + foo191() + foo192() + foo193() + foo194() + foo195() + foo196() + foo197() + foo198() + foo199() + foo200() + foo201() + foo202() + foo203() + foo204() + foo205() + foo206() + foo207() + foo208() + foo209() + foo210() + foo211() + foo212() + foo213() + foo214() + foo215() + foo216() + foo217() + foo218() + foo219() + foo220() + foo221() + foo222() + foo223() + foo224() + foo225() + foo226() + foo227() + foo228() + foo229() + foo230() + foo231() + foo232() + foo233() + foo234() + foo235() + foo236() + foo237() + foo238() + foo239() + foo240() + foo241() + foo242() + foo243() + foo244() + foo245() + foo246() + foo247() + foo248() + foo249() + foo250() + foo251() + foo252() + foo253() + foo254() + foo255() + foo256() + foo257() + foo258() + foo259() + foo260() + foo261() + foo262() + foo263() + foo264() + foo265() + foo266() + foo267() + foo268() + foo269() + foo270() + foo271() + foo272() + foo273() + foo274() + foo275() + foo276() + foo277() + foo278() + foo279() + foo280() + foo281() + foo282() + foo283() + foo284() + foo285() + foo286() + foo287() + foo288() + foo289() + foo290() + foo291() + foo292() + foo293() + foo294() + foo295() + foo296() + foo297() + foo298() + foo299() + foo300() + foo301() + foo302() + foo303() + foo304() + foo305() + foo306() + foo307() + foo308() + foo309() + foo310() + foo311() + foo312() + foo313() + foo314() + foo315() + foo316() + foo317() + foo318() + foo319() + foo320() + foo321() + foo322() + foo323() + foo324() + foo325() + foo326() + foo327() + foo328() + foo329() + foo330() + foo331() + foo332() + foo333() + foo334() + foo335() + foo336() + foo337() + foo338() + foo339() + foo340() + foo341() + foo342() + foo343() + foo344() + foo345() + foo346() + foo347() + foo348() + foo349() + foo350() + foo351() + foo352() + foo353() + foo354() + foo355() + foo356() + foo357() + foo358() + foo359() + foo360() + foo361() + foo362() + foo363() + foo364() + foo365() + foo366() + foo367() + foo368() + foo369() + foo370() + foo371() + foo372() + foo373() + foo374() + foo375() + foo376() + foo377() + foo378() + foo379() + foo380() + foo381() + foo382() + foo383() + foo384() + foo385() + foo386() + foo387() + foo388() + foo389() + foo390() + foo391() + foo392() + foo393() + foo394() + foo395() + foo396() + foo397() + foo398() + foo399() + foo400() + foo401() + foo402() + foo403() + foo404() + foo405() + foo406() + foo407() + foo408() + foo409() + foo410() + foo411() + foo412() + foo413() + foo414() + foo415() + foo416() + foo417() + foo418() + foo419() + foo420() + foo421() + foo422() + foo423() + foo424() + foo425() + foo426() + foo427() + foo428() + foo429() + foo430() + foo431() + foo432() + foo433() + foo434() + foo435() + foo436() + foo437() + foo438() + foo439() + foo440() + foo441() + foo442() + foo443() + foo444() + foo445() + foo446() + foo447() + foo448() + foo449() + foo450() + foo451() + foo452() + foo453() + foo454() + foo455() + foo456() + foo457() + foo458() + foo459() + foo460() + foo461() + foo462() + foo463() + foo464() + foo465() + foo466() + foo467() + foo468() + foo469() + foo470() + foo471() + foo472() + foo473() + foo474() + foo475() + foo476() + foo477() + foo478() + foo479() + foo480() + foo481() + foo482() + foo483() + foo484() + foo485() + foo486() + foo487() + foo488() + foo489() + foo490() + foo491() + foo492() + foo493() + foo494() + foo495() + foo496() + foo497() + foo498() + foo499() + foo500() + foo501() + foo502() + foo503() + foo504() + foo505() + foo506() + foo507() + foo508() + foo509() + foo510() + foo511() +} + +// Nil checks before calling interface methods. +// We need it only when the offset is large. + +func callMethodSmallOffset(i I) { + // amd64:-"TESTB" + i.foo001() +} + +func callMethodLargeOffset(i I) { + // amd64:"TESTB" + i.foo511() +} diff --git a/test/codegen/issue25378.go b/test/codegen/issue25378.go new file mode 100644 index 0000000..810a022 --- /dev/null +++ b/test/codegen/issue25378.go @@ -0,0 +1,22 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +var wsp = [256]bool{ + ' ': true, + '\t': true, + '\n': true, + '\r': true, +} + +func zeroExtArgByte(ch [2]byte) bool { + return wsp[ch[0]] // amd64:-"MOVBLZX\t..,.." +} + +func zeroExtArgUint16(ch [2]uint16) bool { + return wsp[ch[0]] // amd64:-"MOVWLZX\t..,.." +} diff --git a/test/codegen/issue31618.go b/test/codegen/issue31618.go new file mode 100644 index 0000000..8effe29 --- /dev/null +++ b/test/codegen/issue31618.go @@ -0,0 +1,22 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// Make sure we remove both inline marks in the following code. +// Both +5 and +6 should map to real instructions, which can +// be used as inline marks instead of explicit nops. +func f(x int) int { + // amd64:-"XCHGL" + x = g(x) + 5 + // amd64:-"XCHGL" + x = g(x) + 6 + return x +} + +func g(x int) int { + return x >> 3 +} diff --git a/test/codegen/issue33580.go b/test/codegen/issue33580.go new file mode 100644 index 0000000..1ded944 --- /dev/null +++ b/test/codegen/issue33580.go @@ -0,0 +1,25 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Make sure we reuse large constant loads, if we can. +// See issue 33580. + +package codegen + +const ( + A = 7777777777777777 + B = 8888888888888888 +) + +func f(x, y uint64) uint64 { + p := x & A + q := y & A + r := x & B + // amd64:-"MOVQ.*8888888888888888" + s := y & B + + return p * q * r * s +} diff --git a/test/codegen/issue38554.go b/test/codegen/issue38554.go new file mode 100644 index 0000000..84db847 --- /dev/null +++ b/test/codegen/issue38554.go @@ -0,0 +1,15 @@ +// asmcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that we are zeroing directly instead of +// copying a large zero value. Issue 38554. + +package codegen + +func retlarge() [256]byte { + // amd64:-"DUFFCOPY" + return [256]byte{} +} diff --git a/test/codegen/issue42610.go b/test/codegen/issue42610.go new file mode 100644 index 0000000..c7eeddc --- /dev/null +++ b/test/codegen/issue42610.go @@ -0,0 +1,30 @@ +// asmcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Don't allow 0 masks in shift lowering rules on ppc64x. +// See issue 42610. + +package codegen + +func f32(a []int32, i uint32) { + g := func(p int32) int32 { + i = uint32(p) * (uint32(p) & (i & 1)) + return 1 + } + // ppc64le: -"RLWNIM" + // ppc64: -"RLWNIM" + a[0] = g(8) >> 1 +} + +func f(a []int, i uint) { + g := func(p int) int { + i = uint(p) * (uint(p) & (i & 1)) + return 1 + } + // ppc64le: -"RLDIC" + // ppc64: -"RLDIC" + a[0] = g(8) >> 1 +} diff --git a/test/codegen/issue48054.go b/test/codegen/issue48054.go new file mode 100644 index 0000000..1f3a041 --- /dev/null +++ b/test/codegen/issue48054.go @@ -0,0 +1,31 @@ +// asmcheck + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func a(n string) bool { + // arm64:"CBZ" + if len(n) > 0 { + return true + } + return false +} + +func a2(n []int) bool { + // arm64:"CBZ" + if len(n) > 0 { + return true + } + return false +} + +func a3(n []int) bool { + // amd64:"TESTQ" + if len(n) < 1 { + return true + } + return false +} diff --git a/test/codegen/issue52635.go b/test/codegen/issue52635.go new file mode 100644 index 0000000..0e4d169 --- /dev/null +++ b/test/codegen/issue52635.go @@ -0,0 +1,36 @@ +// asmcheck + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Test that optimized range memclr works with pointers to arrays. + +package codegen + +type T struct { + a *[10]int + b [10]int +} + +func (t *T) f() { + // amd64:".*runtime.memclrNoHeapPointers" + for i := range t.a { + t.a[i] = 0 + } + + // amd64:".*runtime.memclrNoHeapPointers" + for i := range *t.a { + t.a[i] = 0 + } + + // amd64:".*runtime.memclrNoHeapPointers" + for i := range t.a { + (*t.a)[i] = 0 + } + + // amd64:".*runtime.memclrNoHeapPointers" + for i := range *t.a { + (*t.a)[i] = 0 + } +} diff --git a/test/codegen/issue54467.go b/test/codegen/issue54467.go new file mode 100644 index 0000000..d34b327 --- /dev/null +++ b/test/codegen/issue54467.go @@ -0,0 +1,59 @@ +// asmcheck + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func f1(x *[4]int, y *[4]int) { + // amd64:".*memmove" + *x = *y +} +func f2(x *[4]int, y [4]int) { + // amd64:-".*memmove" + *x = y +} +func f3(x *[4]int, y *[4]int) { + // amd64:-".*memmove" + t := *y + // amd64:-".*memmove" + *x = t +} +func f4(x *[4]int, y [4]int) { + // amd64:-".*memmove" + t := y + // amd64:-".*memmove" + *x = t +} + +type T struct { + a [4]int +} + +func f5(x, y *T) { + // amd64:-".*memmove" + x.a = y.a +} +func f6(x *T, y T) { + // amd64:-".*memmove" + x.a = y.a +} +func f7(x *T, y *[4]int) { + // amd64:-".*memmove" + x.a = *y +} +func f8(x *[4]int, y *T) { + // amd64:-".*memmove" + *x = y.a +} + +func f9(x [][4]int, y [][4]int, i, j int) { + // amd64:-".*memmove" + x[i] = y[j] +} + +func f10() []byte { + // amd64:-".*memmove" + return []byte("aReasonablyBigTestString") +} diff --git a/test/codegen/issue56440.go b/test/codegen/issue56440.go new file mode 100644 index 0000000..c6c1e66 --- /dev/null +++ b/test/codegen/issue56440.go @@ -0,0 +1,34 @@ +// asmcheck + +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Check to make sure that we recognize when the length of an append +// is constant. We check this by making sure that the constant length +// is folded into a load offset. + +package p + +func f(x []int) int { + s := make([]int, 3) + s = append(s, 4, 5) + // amd64:`MOVQ\t40\(.*\),` + return x[len(s)] +} + +func g(x []int, p *bool) int { + s := make([]int, 3) + for { + s = s[:3] + if cap(s) < 5 { + s = make([]int, 3, 5) + } + s = append(s, 4, 5) + if *p { + // amd64:`MOVQ\t40\(.*\),` + return x[len(s)] + } + } + return 0 +} diff --git a/test/codegen/logic.go b/test/codegen/logic.go new file mode 100644 index 0000000..50ce5f0 --- /dev/null +++ b/test/codegen/logic.go @@ -0,0 +1,32 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +var gx, gy int + +// Test to make sure that (CMPQ (ANDQ x y) [0]) does not get rewritten to +// (TESTQ x y) if the ANDQ has other uses. If that rewrite happens, then one +// of the args of the ANDQ needs to be saved so it can be used as the arg to TESTQ. +func andWithUse(x, y int) int { + // Load x,y into registers, so those MOVQ will not appear at the z := x&y line. + gx, gy = x, y + // amd64:-"MOVQ" + z := x & y + if z == 0 { + return 77 + } + // use z by returning it + return z +} + +// Verify (OR x (NOT y)) rewrites to (ORN x y) where supported +func ornot(x, y int) int { + // ppc64:"ORN" + // ppc64le:"ORN" + z := x | ^y + return z +} diff --git a/test/codegen/mapaccess.go b/test/codegen/mapaccess.go new file mode 100644 index 0000000..3d494e7 --- /dev/null +++ b/test/codegen/mapaccess.go @@ -0,0 +1,484 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// These tests check that mapaccess calls are not used. +// Issues #23661 and #24364. + +func mapCompoundAssignmentInt8() { + m := make(map[int8]int8, 0) + var k int8 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] += 67 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] -= 123 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] *= 45 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] |= 78 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] ^= 89 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] <<= 9 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] >>= 10 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]++ + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]-- +} + +func mapCompoundAssignmentInt32() { + m := make(map[int32]int32, 0) + var k int32 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] += 67890 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] -= 123 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] *= 456 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] |= 78 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] ^= 89 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] <<= 9 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] >>= 10 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]++ + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]-- +} + +func mapCompoundAssignmentInt64() { + m := make(map[int64]int64, 0) + var k int64 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] += 67890 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] -= 123 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] *= 456 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] |= 78 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] ^= 89 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] <<= 9 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] >>= 10 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]++ + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]-- +} + +func mapCompoundAssignmentComplex128() { + m := make(map[complex128]complex128, 0) + var k complex128 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] += 67890 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] -= 123 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] *= 456 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]++ + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k]-- +} + +func mapCompoundAssignmentString() { + m := make(map[string]string, 0) + var k string = "key" + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] += "value" +} + +var sinkAppend bool + +func mapAppendAssignmentInt8() { + m := make(map[int8][]int8, 0) + var k int8 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1, 2, 3) + + a := []int8{7, 8, 9, 0} + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], a...) + + // Exceptions + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(a, m[k]...) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + sinkAppend, m[k] = !sinkAppend, append(m[k], 99) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(m[k+1], 100) +} + +func mapAppendAssignmentInt32() { + m := make(map[int32][]int32, 0) + var k int32 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1, 2, 3) + + a := []int32{7, 8, 9, 0} + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], a...) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k+1] = append(m[k+1], a...) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[-k] = append(m[-k], a...) + + // Exceptions + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(a, m[k]...) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + sinkAppend, m[k] = !sinkAppend, append(m[k], 99) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(m[k+1], 100) +} + +func mapAppendAssignmentInt64() { + m := make(map[int64][]int64, 0) + var k int64 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1, 2, 3) + + a := []int64{7, 8, 9, 0} + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], a...) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k+1] = append(m[k+1], a...) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[-k] = append(m[-k], a...) + + // Exceptions + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(a, m[k]...) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + sinkAppend, m[k] = !sinkAppend, append(m[k], 99) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(m[k+1], 100) +} + +func mapAppendAssignmentComplex128() { + m := make(map[complex128][]complex128, 0) + var k complex128 = 0 + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1) + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], 1, 2, 3) + + a := []complex128{7, 8, 9, 0} + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], a...) + + // Exceptions + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(a, m[k]...) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + sinkAppend, m[k] = !sinkAppend, append(m[k], 99) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(m[k+1], 100) +} + +func mapAppendAssignmentString() { + m := make(map[string][]string, 0) + var k string = "key" + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], "1") + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], "1", "2", "3") + + a := []string{"7", "8", "9", "0"} + + // 386:-".*mapaccess" + // amd64:-".*mapaccess" + // arm:-".*mapaccess" + // arm64:-".*mapaccess" + m[k] = append(m[k], a...) + + // Exceptions + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(a, m[k]...) + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + sinkAppend, m[k] = !sinkAppend, append(m[k], "99") + + // 386:".*mapaccess" + // amd64:".*mapaccess" + // arm:".*mapaccess" + // arm64:".*mapaccess" + m[k] = append(m[k+"1"], "100") +} diff --git a/test/codegen/maps.go b/test/codegen/maps.go new file mode 100644 index 0000000..ea3a70d --- /dev/null +++ b/test/codegen/maps.go @@ -0,0 +1,154 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains code generation tests related to the handling of +// map types. + +// ------------------- // +// Access Const // +// ------------------- // + +// Direct use of constants in fast map access calls (Issue #19015). + +func AccessInt1(m map[int]int) int { + // amd64:"MOV[LQ]\t[$]5" + return m[5] +} + +func AccessInt2(m map[int]int) bool { + // amd64:"MOV[LQ]\t[$]5" + _, ok := m[5] + return ok +} + +func AccessString1(m map[string]int) int { + // amd64:`.*"abc"` + return m["abc"] +} + +func AccessString2(m map[string]int) bool { + // amd64:`.*"abc"` + _, ok := m["abc"] + return ok +} + +// ------------------- // +// String Conversion // +// ------------------- // + +func LookupStringConversionSimple(m map[string]int, bytes []byte) int { + // amd64:-`.*runtime\.slicebytetostring\(` + return m[string(bytes)] +} + +func LookupStringConversionStructLit(m map[struct{ string }]int, bytes []byte) int { + // amd64:-`.*runtime\.slicebytetostring\(` + return m[struct{ string }{string(bytes)}] +} + +func LookupStringConversionArrayLit(m map[[2]string]int, bytes []byte) int { + // amd64:-`.*runtime\.slicebytetostring\(` + return m[[2]string{string(bytes), string(bytes)}] +} + +func LookupStringConversionNestedLit(m map[[1]struct{ s [1]string }]int, bytes []byte) int { + // amd64:-`.*runtime\.slicebytetostring\(` + return m[[1]struct{ s [1]string }{struct{ s [1]string }{s: [1]string{string(bytes)}}}] +} + +func LookupStringConversionKeyedArrayLit(m map[[2]string]int, bytes []byte) int { + // amd64:-`.*runtime\.slicebytetostring\(` + return m[[2]string{0: string(bytes)}] +} + +// ------------------- // +// Map Clear // +// ------------------- // + +// Optimization of map clear idiom (Issue #20138). + +func MapClearReflexive(m map[int]int) { + // amd64:`.*runtime\.mapclear` + // amd64:-`.*runtime\.mapiterinit` + for k := range m { + delete(m, k) + } +} + +func MapClearIndirect(m map[int]int) { + s := struct{ m map[int]int }{m: m} + // amd64:`.*runtime\.mapclear` + // amd64:-`.*runtime\.mapiterinit` + for k := range s.m { + delete(s.m, k) + } +} + +func MapClearPointer(m map[*byte]int) { + // amd64:`.*runtime\.mapclear` + // amd64:-`.*runtime\.mapiterinit` + for k := range m { + delete(m, k) + } +} + +func MapClearNotReflexive(m map[float64]int) { + // amd64:`.*runtime\.mapiterinit` + // amd64:-`.*runtime\.mapclear` + for k := range m { + delete(m, k) + } +} + +func MapClearInterface(m map[interface{}]int) { + // amd64:`.*runtime\.mapiterinit` + // amd64:-`.*runtime\.mapclear` + for k := range m { + delete(m, k) + } +} + +func MapClearSideEffect(m map[int]int) int { + k := 0 + // amd64:`.*runtime\.mapiterinit` + // amd64:-`.*runtime\.mapclear` + for k = range m { + delete(m, k) + } + return k +} + +func MapLiteralSizing(x int) (map[int]int, map[int]int) { + // amd64:"MOVL\t[$]10," + m := map[int]int{ + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + } + // amd64:"MOVL\t[$]10," + n := map[int]int{ + 0: x, + 1: x, + 2: x, + 3: x, + 4: x, + 5: x, + 6: x, + 7: x, + 8: x, + 9: x, + } + return m, n +} diff --git a/test/codegen/math.go b/test/codegen/math.go new file mode 100644 index 0000000..7c76d26 --- /dev/null +++ b/test/codegen/math.go @@ -0,0 +1,261 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "math" + +var sink64 [8]float64 + +func approx(x float64) { + // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41" + // amd64:"ROUNDSD\t[$]2" + // s390x:"FIDBR\t[$]6" + // arm64:"FRINTPD" + // ppc64:"FRIP" + // ppc64le:"FRIP" + // wasm:"F64Ceil" + sink64[0] = math.Ceil(x) + + // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41" + // amd64:"ROUNDSD\t[$]1" + // s390x:"FIDBR\t[$]7" + // arm64:"FRINTMD" + // ppc64:"FRIM" + // ppc64le:"FRIM" + // wasm:"F64Floor" + sink64[1] = math.Floor(x) + + // s390x:"FIDBR\t[$]1" + // arm64:"FRINTAD" + // ppc64:"FRIN" + // ppc64le:"FRIN" + sink64[2] = math.Round(x) + + // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41" + // amd64:"ROUNDSD\t[$]3" + // s390x:"FIDBR\t[$]5" + // arm64:"FRINTZD" + // ppc64:"FRIZ" + // ppc64le:"FRIZ" + // wasm:"F64Trunc" + sink64[3] = math.Trunc(x) + + // amd64/v2:-".*x86HasSSE41" amd64/v3:-".*x86HasSSE41" + // amd64:"ROUNDSD\t[$]0" + // s390x:"FIDBR\t[$]4" + // arm64:"FRINTND" + // wasm:"F64Nearest" + sink64[4] = math.RoundToEven(x) +} + +func sqrt(x float64) float64 { + // amd64:"SQRTSD" + // 386/sse2:"SQRTSD" 386/softfloat:-"SQRTD" + // arm64:"FSQRTD" + // arm/7:"SQRTD" + // mips/hardfloat:"SQRTD" mips/softfloat:-"SQRTD" + // mips64/hardfloat:"SQRTD" mips64/softfloat:-"SQRTD" + // wasm:"F64Sqrt" + // ppc64le:"FSQRT" + // ppc64:"FSQRT" + return math.Sqrt(x) +} + +func sqrt32(x float32) float32 { + // amd64:"SQRTSS" + // 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS" + // arm64:"FSQRTS" + // arm/7:"SQRTF" + // mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF" + // mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF" + // wasm:"F32Sqrt" + // ppc64le:"FSQRTS" + // ppc64:"FSQRTS" + return float32(math.Sqrt(float64(x))) +} + +// Check that it's using integer registers +func abs(x, y float64) { + // amd64:"BTRQ\t[$]63" + // arm64:"FABSD\t" + // s390x:"LPDFR\t",-"MOVD\t" (no integer load/store) + // ppc64:"FABS\t" + // ppc64le:"FABS\t" + // riscv64:"FABSD\t" + // wasm:"F64Abs" + // arm/6:"ABSD\t" + sink64[0] = math.Abs(x) + + // amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ) + // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) + // ppc64:"FNABS\t" + // ppc64le:"FNABS\t" + sink64[1] = -math.Abs(y) +} + +// Check that it's using integer registers +func abs32(x float32) float32 { + // s390x:"LPDFR",-"LDEBR",-"LEDBR" (no float64 conversion) + return float32(math.Abs(float64(x))) +} + +// Check that it's using integer registers +func copysign(a, b, c float64) { + // amd64:"BTRQ\t[$]63","ANDQ","ORQ" + // s390x:"CPSDR",-"MOVD" (no integer load/store) + // ppc64:"FCPSGN" + // ppc64le:"FCPSGN" + // riscv64:"FSGNJD" + // wasm:"F64Copysign" + sink64[0] = math.Copysign(a, b) + + // amd64:"BTSQ\t[$]63" + // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) + // ppc64:"FCPSGN" + // ppc64le:"FCPSGN" + // riscv64:"FSGNJD" + // arm64:"ORR", -"AND" + sink64[1] = math.Copysign(c, -1) + + // Like math.Copysign(c, -1), but with integer operations. Useful + // for platforms that have a copysign opcode to see if it's detected. + // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) + sink64[2] = math.Float64frombits(math.Float64bits(a) | 1<<63) + + // amd64:"ANDQ","ORQ" + // s390x:"CPSDR\t",-"MOVD\t" (no integer load/store) + // ppc64:"FCPSGN" + // ppc64le:"FCPSGN" + // riscv64:"FSGNJD" + sink64[3] = math.Copysign(-1, c) +} + +func fma(x, y, z float64) float64 { + // amd64/v3:-".*x86HasFMA" + // amd64:"VFMADD231SD" + // arm/6:"FMULAD" + // arm64:"FMADDD" + // s390x:"FMADD" + // ppc64:"FMADD" + // ppc64le:"FMADD" + // riscv64:"FMADDD" + return math.FMA(x, y, z) +} + +func fms(x, y, z float64) float64 { + // riscv64:"FMSUBD" + return math.FMA(x, y, -z) +} + +func fnma(x, y, z float64) float64 { + // riscv64:"FNMADDD" + return math.FMA(-x, y, z) +} + +func fnms(x, y, z float64) float64 { + // riscv64:"FNMSUBD" + return math.FMA(x, -y, -z) +} + +func fromFloat64(f64 float64) uint64 { + // amd64:"MOVQ\tX.*, [^X].*" + // arm64:"FMOVD\tF.*, R.*" + // ppc64:"MFVSRD" + // ppc64le:"MFVSRD" + return math.Float64bits(f64+1) + 1 +} + +func fromFloat32(f32 float32) uint32 { + // amd64:"MOVL\tX.*, [^X].*" + // arm64:"FMOVS\tF.*, R.*" + return math.Float32bits(f32+1) + 1 +} + +func toFloat64(u64 uint64) float64 { + // amd64:"MOVQ\t[^X].*, X.*" + // arm64:"FMOVD\tR.*, F.*" + // ppc64:"MTVSRD" + // ppc64le:"MTVSRD" + return math.Float64frombits(u64+1) + 1 +} + +func toFloat32(u32 uint32) float32 { + // amd64:"MOVL\t[^X].*, X.*" + // arm64:"FMOVS\tR.*, F.*" + return math.Float32frombits(u32+1) + 1 +} + +// Test that comparisons with constants converted to float +// are evaluated at compile-time + +func constantCheck64() bool { + // amd64:"(MOVB\t[$]0)|(XORL\t[A-Z][A-Z0-9]+, [A-Z][A-Z0-9]+)",-"FCMP",-"MOVB\t[$]1" + // s390x:"MOV(B|BZ|D)\t[$]0,",-"FCMPU",-"MOV(B|BZ|D)\t[$]1," + return 0.5 == float64(uint32(1)) || 1.5 > float64(uint64(1<<63)) +} + +func constantCheck32() bool { + // amd64:"MOV(B|L)\t[$]1",-"FCMP",-"MOV(B|L)\t[$]0" + // s390x:"MOV(B|BZ|D)\t[$]1,",-"FCMPU",-"MOV(B|BZ|D)\t[$]0," + return float32(0.5) <= float32(int64(1)) && float32(1.5) >= float32(int32(-1<<31)) +} + +// Test that integer constants are converted to floating point constants +// at compile-time + +func constantConvert32(x float32) float32 { + // amd64:"MOVSS\t[$]f32.3f800000\\(SB\\)" + // s390x:"FMOVS\t[$]f32.3f800000\\(SB\\)" + // ppc64:"FMOVS\t[$]f32.3f800000\\(SB\\)" + // ppc64le:"FMOVS\t[$]f32.3f800000\\(SB\\)" + // arm64:"FMOVS\t[$]\\(1.0\\)" + if x > math.Float32frombits(0x3f800000) { + return -x + } + return x +} + +func constantConvertInt32(x uint32) uint32 { + // amd64:-"MOVSS" + // s390x:-"FMOVS" + // ppc64:-"FMOVS" + // ppc64le:-"FMOVS" + // arm64:-"FMOVS" + if x > math.Float32bits(1) { + return -x + } + return x +} + +func nanGenerate64() float64 { + // Test to make sure we don't generate a NaN while constant propagating. + // See issue 36400. + zero := 0.0 + // amd64:-"DIVSD" + inf := 1 / zero // +inf. We can constant propagate this one. + negone := -1.0 + + // amd64:"DIVSD" + z0 := zero / zero + // amd64:"MULSD" + z1 := zero * inf + // amd64:"SQRTSD" + z2 := math.Sqrt(negone) + return z0 + z1 + z2 +} + +func nanGenerate32() float32 { + zero := float32(0.0) + // amd64:-"DIVSS" + inf := 1 / zero // +inf. We can constant propagate this one. + + // amd64:"DIVSS" + z0 := zero / zero + // amd64:"MULSS" + z1 := zero * inf + return z0 + z1 +} diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go new file mode 100644 index 0000000..b506a37 --- /dev/null +++ b/test/codegen/mathbits.go @@ -0,0 +1,903 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "math/bits" + +// ----------------------- // +// bits.LeadingZeros // +// ----------------------- // + +func LeadingZeros(n uint) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3:"LZCNTQ", -"BSRQ" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"CNTLZD" + // ppc64:"CNTLZD" + return bits.LeadingZeros(n) +} + +func LeadingZeros64(n uint64) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3:"LZCNTQ", -"BSRQ" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"CNTLZD" + // ppc64:"CNTLZD" + return bits.LeadingZeros64(n) +} + +func LeadingZeros32(n uint32) int { + // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZW" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"CNTLZW" + // ppc64:"CNTLZW" + return bits.LeadingZeros32(n) +} + +func LeadingZeros16(n uint16) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"CNTLZD" + // ppc64:"CNTLZD" + return bits.LeadingZeros16(n) +} + +func LeadingZeros8(n uint8) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"CNTLZD" + // ppc64:"CNTLZD" + return bits.LeadingZeros8(n) +} + +// --------------- // +// bits.Len* // +// --------------- // + +func Len(n uint) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3: "LZCNTQ" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"SUBC","CNTLZD" + // ppc64:"SUBC","CNTLZD" + return bits.Len(n) +} + +func Len64(n uint64) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3: "LZCNTQ" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"SUBC","CNTLZD" + // ppc64:"SUBC","CNTLZD" + return bits.Len64(n) +} + +func SubFromLen64(n uint64) int { + // ppc64le:"CNTLZD",-"SUBC" + // ppc64:"CNTLZD",-"SUBC" + return 64 - bits.Len64(n) +} + +func Len32(n uint32) int { + // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" + // amd64/v3: "LZCNTL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64: "CNTLZW" + // ppc64le: "CNTLZW" + return bits.Len32(n) +} + +func Len16(n uint16) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"SUBC","CNTLZD" + // ppc64:"SUBC","CNTLZD" + return bits.Len16(n) +} + +func Len8(n uint8) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL" + // s390x:"FLOGR" + // arm:"CLZ" arm64:"CLZ" + // mips:"CLZ" + // wasm:"I64Clz" + // ppc64le:"SUBC","CNTLZD" + // ppc64:"SUBC","CNTLZD" + return bits.Len8(n) +} + +// -------------------- // +// bits.OnesCount // +// -------------------- // + +// TODO(register args) Restore a m d 6 4 / v 1 :.*x86HasPOPCNT when only one ABI is tested. +func OnesCount(n uint) int { + // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" + // amd64:"POPCNTQ" + // arm64:"VCNT","VUADDLV" + // s390x:"POPCNT" + // ppc64:"POPCNTD" + // ppc64le:"POPCNTD" + // wasm:"I64Popcnt" + return bits.OnesCount(n) +} + +func OnesCount64(n uint64) int { + // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" + // amd64:"POPCNTQ" + // arm64:"VCNT","VUADDLV" + // s390x:"POPCNT" + // ppc64:"POPCNTD" + // ppc64le:"POPCNTD" + // wasm:"I64Popcnt" + return bits.OnesCount64(n) +} + +func OnesCount32(n uint32) int { + // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" + // amd64:"POPCNTL" + // arm64:"VCNT","VUADDLV" + // s390x:"POPCNT" + // ppc64:"POPCNTW" + // ppc64le:"POPCNTW" + // wasm:"I64Popcnt" + return bits.OnesCount32(n) +} + +func OnesCount16(n uint16) int { + // amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT" + // amd64:"POPCNTL" + // arm64:"VCNT","VUADDLV" + // s390x:"POPCNT" + // ppc64:"POPCNTW" + // ppc64le:"POPCNTW" + // wasm:"I64Popcnt" + return bits.OnesCount16(n) +} + +func OnesCount8(n uint8) int { + // s390x:"POPCNT" + // ppc64:"POPCNTB" + // ppc64le:"POPCNTB" + // wasm:"I64Popcnt" + return bits.OnesCount8(n) +} + +// ----------------------- // +// bits.ReverseBytes // +// ----------------------- // + +func ReverseBytes(n uint) uint { + // amd64:"BSWAPQ" + // s390x:"MOVDBR" + // arm64:"REV" + return bits.ReverseBytes(n) +} + +func ReverseBytes64(n uint64) uint64 { + // amd64:"BSWAPQ" + // s390x:"MOVDBR" + // arm64:"REV" + return bits.ReverseBytes64(n) +} + +func ReverseBytes32(n uint32) uint32 { + // amd64:"BSWAPL" + // s390x:"MOVWBR" + // arm64:"REVW" + return bits.ReverseBytes32(n) +} + +func ReverseBytes16(n uint16) uint16 { + // amd64:"ROLW" + // arm64:"REV16W",-"UBFX",-"ORR" + // arm/5:"SLL","SRL","ORR" + // arm/6:"REV16" + // arm/7:"REV16" + return bits.ReverseBytes16(n) +} + +// --------------------- // +// bits.RotateLeft // +// --------------------- // + +func RotateLeft64(n uint64) uint64 { + // amd64:"ROLQ" + // arm64:"ROR" + // ppc64:"ROTL" + // ppc64le:"ROTL" + // s390x:"RISBGZ\t[$]0, [$]63, [$]37, " + // wasm:"I64Rotl" + return bits.RotateLeft64(n, 37) +} + +func RotateLeft32(n uint32) uint32 { + // amd64:"ROLL" 386:"ROLL" + // arm:`MOVW\tR[0-9]+@>23` + // arm64:"RORW" + // ppc64:"ROTLW" + // ppc64le:"ROTLW" + // s390x:"RLL" + // wasm:"I32Rotl" + return bits.RotateLeft32(n, 9) +} + +func RotateLeft16(n uint16, s int) uint16 { + // amd64:"ROLW" 386:"ROLW" + // arm64:"RORW",-"CSEL" + return bits.RotateLeft16(n, s) +} + +func RotateLeft8(n uint8, s int) uint8 { + // amd64:"ROLB" 386:"ROLB" + // arm64:"LSL","LSR",-"CSEL" + return bits.RotateLeft8(n, s) +} + +func RotateLeftVariable(n uint, m int) uint { + // amd64:"ROLQ" + // arm64:"ROR" + // ppc64:"ROTL" + // ppc64le:"ROTL" + // s390x:"RLLG" + // wasm:"I64Rotl" + return bits.RotateLeft(n, m) +} + +func RotateLeftVariable64(n uint64, m int) uint64 { + // amd64:"ROLQ" + // arm64:"ROR" + // ppc64:"ROTL" + // ppc64le:"ROTL" + // s390x:"RLLG" + // wasm:"I64Rotl" + return bits.RotateLeft64(n, m) +} + +func RotateLeftVariable32(n uint32, m int) uint32 { + // arm:`MOVW\tR[0-9]+@>R[0-9]+` + // amd64:"ROLL" + // arm64:"RORW" + // ppc64:"ROTLW" + // ppc64le:"ROTLW" + // s390x:"RLL" + // wasm:"I32Rotl" + return bits.RotateLeft32(n, m) +} + +// ------------------------ // +// bits.TrailingZeros // +// ------------------------ // + +func TrailingZeros(n uint) int { + // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // amd64/v3:"TZCNTQ" + // arm:"CLZ" + // arm64:"RBIT","CLZ" + // s390x:"FLOGR" + // ppc64/power8:"ANDN","POPCNTD" + // ppc64le/power8:"ANDN","POPCNTD" + // ppc64/power9: "CNTTZD" + // ppc64le/power9: "CNTTZD" + // wasm:"I64Ctz" + return bits.TrailingZeros(n) +} + +func TrailingZeros64(n uint64) int { + // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // amd64/v3:"TZCNTQ" + // arm64:"RBIT","CLZ" + // s390x:"FLOGR" + // ppc64/power8:"ANDN","POPCNTD" + // ppc64le/power8:"ANDN","POPCNTD" + // ppc64/power9: "CNTTZD" + // ppc64le/power9: "CNTTZD" + // wasm:"I64Ctz" + return bits.TrailingZeros64(n) +} + +func TrailingZeros64Subtract(n uint64) int { + // ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD" + // ppc64le/power9:"SUBC","CNTTZD" + return bits.TrailingZeros64(1 - n) +} + +func TrailingZeros32(n uint32) int { + // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" + // amd64/v3:"TZCNTL" + // arm:"CLZ" + // arm64:"RBITW","CLZW" + // s390x:"FLOGR","MOVWZ" + // ppc64/power8:"ANDN","POPCNTW" + // ppc64le/power8:"ANDN","POPCNTW" + // ppc64/power9: "CNTTZW" + // ppc64le/power9: "CNTTZW" + // wasm:"I64Ctz" + return bits.TrailingZeros32(n) +} + +func TrailingZeros16(n uint16) int { + // amd64:"BSFL","BTSL\\t\\$16" + // 386:"BSFL\t" + // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" + // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" + // s390x:"FLOGR","OR\t\\$65536" + // ppc64/power8:"POPCNTD","OR\\t\\$65536" + // ppc64le/power8:"POPCNTD","OR\\t\\$65536" + // ppc64/power9:"CNTTZD","OR\\t\\$65536" + // ppc64le/power9:"CNTTZD","OR\\t\\$65536" + // wasm:"I64Ctz" + return bits.TrailingZeros16(n) +} + +func TrailingZeros8(n uint8) int { + // amd64:"BSFL","BTSL\\t\\$8" + // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" + // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" + // s390x:"FLOGR","OR\t\\$256" + // wasm:"I64Ctz" + return bits.TrailingZeros8(n) +} + +// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero. + +func IterateBits(n uint) int { + i := 0 + for n != 0 { + // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" + // amd64/v3:"TZCNTQ" + i += bits.TrailingZeros(n) + n &= n - 1 + } + return i +} + +func IterateBits64(n uint64) int { + i := 0 + for n != 0 { + // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" + // amd64/v3:"TZCNTQ" + i += bits.TrailingZeros64(n) + n &= n - 1 + } + return i +} + +func IterateBits32(n uint32) int { + i := 0 + for n != 0 { + // amd64/v1,amd64/v2:"BSFL",-"BTSQ" + // amd64/v3:"TZCNTL" + i += bits.TrailingZeros32(n) + n &= n - 1 + } + return i +} + +func IterateBits16(n uint16) int { + i := 0 + for n != 0 { + // amd64/v1,amd64/v2:"BSFL",-"BTSL" + // amd64/v3:"TZCNTL" + // arm64:"RBITW","CLZW",-"ORR" + i += bits.TrailingZeros16(n) + n &= n - 1 + } + return i +} + +func IterateBits8(n uint8) int { + i := 0 + for n != 0 { + // amd64/v1,amd64/v2:"BSFL",-"BTSL" + // amd64/v3:"TZCNTL" + // arm64:"RBITW","CLZW",-"ORR" + i += bits.TrailingZeros8(n) + n &= n - 1 + } + return i +} + +// --------------- // +// bits.Add* // +// --------------- // + +func Add(x, y, ci uint) (r, co uint) { + // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // ppc64: "ADDC", "ADDE", "ADDZE" + // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add(x, y, ci) +} + +func AddC(x, ci uint) (r, co uint) { + // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" + // ppc64: "ADDC", "ADDE", "ADDZE" + // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add(x, 7, ci) +} + +func AddZ(x, y uint) (r, co uint) { + // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" + // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" + // loong64: "ADDV", "SGTU" + // ppc64: "ADDC", -"ADDE", "ADDZE" + // ppc64le: "ADDC", -"ADDE", "ADDZE" + // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add(x, y, 0) +} + +func AddR(x, y, ci uint) uint { + // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" + // loong64: "ADDV", -"SGTU" + // ppc64: "ADDC", "ADDE", -"ADDZE" + // ppc64le: "ADDC", "ADDE", -"ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" + r, _ := bits.Add(x, y, ci) + return r +} + +func AddM(p, q, r *[3]uint) { + var c uint + r[0], c = bits.Add(p[0], q[0], c) + // arm64:"ADCS",-"ADD\t",-"CMP" + // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" + // s390x:"ADDE",-"ADDC\t[$]-1," + r[1], c = bits.Add(p[1], q[1], c) + r[2], c = bits.Add(p[2], q[2], c) +} + +func Add64(x, y, ci uint64) (r, co uint64) { + // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" + // ppc64: "ADDC", "ADDE", "ADDZE" + // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add64(x, y, ci) +} + +func Add64C(x, ci uint64) (r, co uint64) { + // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" + // ppc64: "ADDC", "ADDE", "ADDZE" + // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add64(x, 7, ci) +} + +func Add64Z(x, y uint64) (r, co uint64) { + // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" + // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" + // loong64: "ADDV", "SGTU" + // ppc64: "ADDC", -"ADDE", "ADDZE" + // ppc64le: "ADDC", -"ADDE", "ADDZE" + // s390x:"ADDC",-"ADDC\t[$]-1," + // riscv64: "ADD","SLTU" + return bits.Add64(x, y, 0) +} + +func Add64R(x, y, ci uint64) uint64 { + // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" + // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" + // loong64: "ADDV", -"SGTU" + // ppc64: "ADDC", "ADDE", -"ADDZE" + // ppc64le: "ADDC", "ADDE", -"ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," + // riscv64: "ADD",-"SLTU" + r, _ := bits.Add64(x, y, ci) + return r +} +func Add64M(p, q, r *[3]uint64) { + var c uint64 + r[0], c = bits.Add64(p[0], q[0], c) + // arm64:"ADCS",-"ADD\t",-"CMP" + // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" + // ppc64: -"ADDC", "ADDE", -"ADDZE" + // ppc64le: -"ADDC", "ADDE", -"ADDZE" + // s390x:"ADDE",-"ADDC\t[$]-1," + r[1], c = bits.Add64(p[1], q[1], c) + r[2], c = bits.Add64(p[2], q[2], c) +} + +func Add64MSaveC(p, q, r, c *[2]uint64) { + // ppc64: "ADDC\tR", "ADDZE" + // ppc64le: "ADDC\tR", "ADDZE" + r[0], c[0] = bits.Add64(p[0], q[0], 0) + // ppc64: "ADDC\t[$]-1", "ADDE", "ADDZE" + // ppc64le: "ADDC\t[$]-1", "ADDE", "ADDZE" + r[1], c[1] = bits.Add64(p[1], q[1], c[0]) +} + +func Add64PanicOnOverflowEQ(a, b uint64) uint64 { + r, c := bits.Add64(a, b, 0) + // s390x:"BRC\t[$]3,",-"ADDE" + if c == 1 { + panic("overflow") + } + return r +} + +func Add64PanicOnOverflowNE(a, b uint64) uint64 { + r, c := bits.Add64(a, b, 0) + // s390x:"BRC\t[$]3,",-"ADDE" + if c != 0 { + panic("overflow") + } + return r +} + +func Add64PanicOnOverflowGT(a, b uint64) uint64 { + r, c := bits.Add64(a, b, 0) + // s390x:"BRC\t[$]3,",-"ADDE" + if c > 0 { + panic("overflow") + } + return r +} + +func Add64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Add64(a[0], b[0], c) + r[1], c = bits.Add64(a[1], b[1], c) + // s390x:"BRC\t[$]3," + if c == 1 { + panic("overflow") + } + return r +} + +func Add64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Add64(a[0], b[0], c) + r[1], c = bits.Add64(a[1], b[1], c) + // s390x:"BRC\t[$]3," + if c != 0 { + panic("overflow") + } + return r +} + +func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Add64(a[0], b[0], c) + r[1], c = bits.Add64(a[1], b[1], c) + // s390x:"BRC\t[$]3," + if c > 0 { + panic("overflow") + } + return r +} + +// Verify independent carry chain operations are scheduled efficiently +// and do not cause unnecessary save/restore of the CA bit. +// +// This is an example of why CarryChainTail priority must be lower +// (earlier in the block) than Memory. f[0]=f1 could be scheduled +// after the first two lower 64 bit limb adds, but before either +// high 64 bit limbs are added. +// +// This is what happened on PPC64 when compiling +// crypto/internal/edwards25519/field.feMulGeneric. +func Add64MultipleChains(a, b, c, d [2]uint64) { + var cx, d1, d2 uint64 + a1, a2 := a[0], a[1] + b1, b2 := b[0], b[1] + c1, c2 := c[0], c[1] + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(a1, b1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(a2, b2, cx) + + // ppc64: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + // ppc64le: "ADDC\tR\\d+,", -"ADDE", -"MOVD\tXER" + d1, cx = bits.Add64(c1, d1, 0) + // ppc64: "ADDE", -"ADDC", -"MOVD\t.*, XER" + // ppc64le: "ADDE", -"ADDC", -"MOVD\t.*, XER" + d2, _ = bits.Add64(c2, d2, cx) + d[0] = d1 + d[1] = d2 +} + +// --------------- // +// bits.Sub* // +// --------------- // + +func Sub(x, y, ci uint) (r, co uint) { + // amd64:"NEGL","SBBQ","NEGQ" + // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" + // s390x:"SUBE" + // riscv64: "SUB","SLTU" + return bits.Sub(x, y, ci) +} + +func SubC(x, ci uint) (r, co uint) { + // amd64:"NEGL","SBBQ","NEGQ" + // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" + // s390x:"SUBE" + // riscv64: "SUB","SLTU" + return bits.Sub(x, 7, ci) +} + +func SubZ(x, y uint) (r, co uint) { + // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" + // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" + // s390x:"SUBC" + // riscv64: "SUB","SLTU" + return bits.Sub(x, y, 0) +} + +func SubR(x, y, ci uint) uint { + // amd64:"NEGL","SBBQ",-"NEGQ" + // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV",-"SGTU" + // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" + // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" + // s390x:"SUBE" + // riscv64: "SUB",-"SLTU" + r, _ := bits.Sub(x, y, ci) + return r +} +func SubM(p, q, r *[3]uint) { + var c uint + r[0], c = bits.Sub(p[0], q[0], c) + // amd64:"SBBQ",-"NEGL",-"NEGQ" + // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" + // ppc64:-"SUBC", "SUBE", -"SUBZE", -"NEG" + // ppc64le:-"SUBC", "SUBE", -"SUBZE", -"NEG" + // s390x:"SUBE" + r[1], c = bits.Sub(p[1], q[1], c) + r[2], c = bits.Sub(p[2], q[2], c) +} + +func Sub64(x, y, ci uint64) (r, co uint64) { + // amd64:"NEGL","SBBQ","NEGQ" + // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" + // s390x:"SUBE" + // riscv64: "SUB","SLTU" + return bits.Sub64(x, y, ci) +} + +func Sub64C(x, ci uint64) (r, co uint64) { + // amd64:"NEGL","SBBQ","NEGQ" + // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" + // s390x:"SUBE" + // riscv64: "SUB","SLTU" + return bits.Sub64(x, 7, ci) +} + +func Sub64Z(x, y uint64) (r, co uint64) { + // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" + // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" + // loong64:"SUBV","SGTU" + // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" + // s390x:"SUBC" + // riscv64: "SUB","SLTU" + return bits.Sub64(x, y, 0) +} + +func Sub64R(x, y, ci uint64) uint64 { + // amd64:"NEGL","SBBQ",-"NEGQ" + // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" + // loong64:"SUBV",-"SGTU" + // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" + // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" + // s390x:"SUBE" + // riscv64: "SUB",-"SLTU" + r, _ := bits.Sub64(x, y, ci) + return r +} +func Sub64M(p, q, r *[3]uint64) { + var c uint64 + r[0], c = bits.Sub64(p[0], q[0], c) + // amd64:"SBBQ",-"NEGL",-"NEGQ" + // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" + r[1], c = bits.Sub64(p[1], q[1], c) + r[2], c = bits.Sub64(p[2], q[2], c) +} + +func Sub64MSaveC(p, q, r, c *[2]uint64) { + // ppc64:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" + // ppc64le:"SUBC\tR\\d+, R\\d+,", "SUBZE", "NEG" + r[0], c[0] = bits.Sub64(p[0], q[0], 0) + // ppc64:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" + // ppc64le:"SUBC\tR\\d+, [$]0,", "SUBE", "SUBZE", "NEG" + r[1], c[1] = bits.Sub64(p[1], q[1], c[0]) +} + +func Sub64PanicOnOverflowEQ(a, b uint64) uint64 { + r, b := bits.Sub64(a, b, 0) + // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" + if b == 1 { + panic("overflow") + } + return r +} + +func Sub64PanicOnOverflowNE(a, b uint64) uint64 { + r, b := bits.Sub64(a, b, 0) + // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" + if b != 0 { + panic("overflow") + } + return r +} + +func Sub64PanicOnOverflowGT(a, b uint64) uint64 { + r, b := bits.Sub64(a, b, 0) + // s390x:"BRC\t[$]12,",-"ADDE",-"SUBE" + if b > 0 { + panic("overflow") + } + return r +} + +func Sub64MPanicOnOverflowEQ(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Sub64(a[0], b[0], c) + r[1], c = bits.Sub64(a[1], b[1], c) + // s390x:"BRC\t[$]12," + if c == 1 { + panic("overflow") + } + return r +} + +func Sub64MPanicOnOverflowNE(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Sub64(a[0], b[0], c) + r[1], c = bits.Sub64(a[1], b[1], c) + // s390x:"BRC\t[$]12," + if c != 0 { + panic("overflow") + } + return r +} + +func Sub64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 { + var r [2]uint64 + var c uint64 + r[0], c = bits.Sub64(a[0], b[0], c) + r[1], c = bits.Sub64(a[1], b[1], c) + // s390x:"BRC\t[$]12," + if c > 0 { + panic("overflow") + } + return r +} + +// --------------- // +// bits.Mul* // +// --------------- // + +func Mul(x, y uint) (hi, lo uint) { + // amd64:"MULQ" + // arm64:"UMULH","MUL" + // ppc64:"MULHDU","MULLD" + // ppc64le:"MULHDU","MULLD" + // s390x:"MLGR" + // mips64: "MULVU" + return bits.Mul(x, y) +} + +func Mul64(x, y uint64) (hi, lo uint64) { + // amd64:"MULQ" + // arm64:"UMULH","MUL" + // ppc64:"MULHDU","MULLD" + // ppc64le:"MULHDU","MULLD" + // s390x:"MLGR" + // mips64: "MULVU" + // riscv64:"MULHU","MUL" + return bits.Mul64(x, y) +} + +func Mul64HiOnly(x, y uint64) uint64 { + // arm64:"UMULH",-"MUL" + // riscv64:"MULHU",-"MUL\t" + hi, _ := bits.Mul64(x, y) + return hi +} + +func Mul64LoOnly(x, y uint64) uint64 { + // arm64:"MUL",-"UMULH" + // riscv64:"MUL\t",-"MULHU" + _, lo := bits.Mul64(x, y) + return lo +} + +// --------------- // +// bits.Div* // +// --------------- // + +func Div(hi, lo, x uint) (q, r uint) { + // amd64:"DIVQ" + return bits.Div(hi, lo, x) +} + +func Div32(hi, lo, x uint32) (q, r uint32) { + // arm64:"ORR","UDIV","MSUB",-"UREM" + return bits.Div32(hi, lo, x) +} + +func Div64(hi, lo, x uint64) (q, r uint64) { + // amd64:"DIVQ" + return bits.Div64(hi, lo, x) +} + +func Div64degenerate(x uint64) (q, r uint64) { + // amd64:-"DIVQ" + return bits.Div64(0, x, 5) +} diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go new file mode 100644 index 0000000..8143b6b --- /dev/null +++ b/test/codegen/memcombine.go @@ -0,0 +1,721 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import ( + "encoding/binary" + "runtime" +) + +// ------------- // +// Loading // +// ------------- // + +func load_le64(b []byte) uint64 { + // amd64:`MOVQ\s\(.*\),`,-`MOV[BWL]\t[^$]`,-`OR` + // s390x:`MOVDBR\s\(.*\),` + // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]` + // ppc64le:`MOVD\s`,-`MOV[BHW]Z` + return binary.LittleEndian.Uint64(b) +} + +func load_le64_idx(b []byte, idx int) uint64 { + // amd64:`MOVQ\s\(.*\)\(.*\*1\),`,-`MOV[BWL]\t[^$]`,-`OR` + // s390x:`MOVDBR\s\(.*\)\(.*\*1\),` + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]` + // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` + return binary.LittleEndian.Uint64(b[idx:]) +} + +func load_le32(b []byte) uint32 { + // amd64:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` + // 386:`MOVL\s\(.*\),`,-`MOV[BW]`,-`OR` + // s390x:`MOVWBR\s\(.*\),` + // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]` + // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` + return binary.LittleEndian.Uint32(b) +} + +func load_le32_idx(b []byte, idx int) uint32 { + // amd64:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` + // 386:`MOVL\s\(.*\)\(.*\*1\),`,-`MOV[BW]`,-`OR` + // s390x:`MOVWBR\s\(.*\)\(.*\*1\),` + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]` + // ppc64le:`MOVWZ\s`,-`MOV[BH]Z\s` + return binary.LittleEndian.Uint32(b[idx:]) +} + +func load_le16(b []byte) uint16 { + // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` + // ppc64le:`MOVHZ\s`,-`MOVBZ` + // arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB` + // s390x:`MOVHBR\s\(.*\),` + return binary.LittleEndian.Uint16(b) +} + +func load_le16_idx(b []byte, idx int) uint16 { + // amd64:`MOVWLZX\s\(.*\),`,-`MOVB`,-`OR` + // ppc64le:`MOVHZ\s`,-`MOVBZ` + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` + // s390x:`MOVHBR\s\(.*\)\(.*\*1\),` + return binary.LittleEndian.Uint16(b[idx:]) +} + +func load_be64(b []byte) uint64 { + // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR` + // amd64/v3:`MOVBEQ` + // s390x:`MOVD\s\(.*\),` + // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W` + // ppc64le:`MOVDBR`,-`MOV[BHW]Z` + return binary.BigEndian.Uint64(b) +} + +func load_be64_idx(b []byte, idx int) uint64 { + // amd64/v1,amd64/v2:`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR` + // amd64/v3: `MOVBEQ\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + // s390x:`MOVD\s\(.*\)\(.*\*1\),` + // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W` + // ppc64le:`MOVDBR`,-`MOV[BHW]Z` + return binary.BigEndian.Uint64(b[idx:]) +} + +func load_be32(b []byte) uint32 { + // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR` + // amd64/v3: `MOVBEL` + // s390x:`MOVWZ\s\(.*\),` + // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W` + // ppc64le:`MOVWBR`,-`MOV[BH]Z` + return binary.BigEndian.Uint32(b) +} + +func load_be32_idx(b []byte, idx int) uint32 { + // amd64/v1,amd64/v2:`BSWAPL`,-`MOV[BW]`,-`OR` + // amd64/v3: `MOVBEL\t\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + // s390x:`MOVWZ\s\(.*\)\(.*\*1\),` + // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W` + // ppc64le:`MOVWBR`,-`MOV[BH]Z` + return binary.BigEndian.Uint32(b[idx:]) +} + +func load_be16(b []byte) uint16 { + // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` + // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB` + // ppc64le:`MOVHBR` + // s390x:`MOVHZ\s\(.*\),`,-`OR`,-`ORW`,-`SLD`,-`SLW` + return binary.BigEndian.Uint16(b) +} + +func load_be16_idx(b []byte, idx int) uint16 { + // amd64:`ROLW\s\$8`,-`MOVB`,-`OR` + // arm64:`REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` + // ppc64le:`MOVHBR` + // s390x:`MOVHZ\s\(.*\)\(.*\*1\),`,-`OR`,-`ORW`,-`SLD`,-`SLW` + return binary.BigEndian.Uint16(b[idx:]) +} + +func load_le_byte2_uint16(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVBZ` + return uint16(s[0]) | uint16(s[1])<<8 +} + +func load_le_byte2_uint16_inv(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // ppc64le:`MOVHZ\t\(R[0-9]+\)`,-`MOVDZ` + return uint16(s[1])<<8 | uint16(s[0]) +} + +func load_le_byte4_uint32(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + // 386:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + // ppc64le:`MOVWZ\t\(R[0-9]+\)`,-`MOV[BH]Z` + return uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 +} + +func load_le_byte4_uint32_inv(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + return uint32(s[3])<<24 | uint32(s[2])<<16 | uint32(s[1])<<8 | uint32(s[0]) +} + +func load_le_byte8_uint64(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR` + // ppc64le:`MOVD\t\(R[0-9]+\)`,-`MOV[BHW]Z` + return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 | uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56 +} + +func load_le_byte8_uint64_inv(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[7])<<56 | uint64(s[6])<<48 | uint64(s[5])<<40 | uint64(s[4])<<32 | uint64(s[3])<<24 | uint64(s[2])<<16 | uint64(s[1])<<8 | uint64(s[0]) +} + +func load_be_byte2_uint16(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` + // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` + return uint16(s[0])<<8 | uint16(s[1]) +} + +func load_be_byte2_uint16_inv(s []byte) uint16 { + // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` + // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` + return uint16(s[1]) | uint16(s[0])<<8 +} + +func load_be_byte4_uint32(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` + return uint32(s[0])<<24 | uint32(s[1])<<16 | uint32(s[2])<<8 | uint32(s[3]) +} + +func load_be_byte4_uint32_inv(s []byte) uint32 { + // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` + // amd64/v1,amd64/v2:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR` + // amd64/v3: `MOVBEL` + return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24 +} + +func load_be_byte8_uint64(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` + // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z` + return uint64(s[0])<<56 | uint64(s[1])<<48 | uint64(s[2])<<40 | uint64(s[3])<<32 | uint64(s[4])<<24 | uint64(s[5])<<16 | uint64(s[6])<<8 | uint64(s[7]) +} + +func load_be_byte8_uint64_inv(s []byte) uint64 { + // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` + // amd64/v1,amd64/v2:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR` + // amd64/v3: `MOVBEQ` + // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z` + return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56 +} + +func load_le_byte2_uint16_idx(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` + return uint16(s[idx]) | uint16(s[idx+1])<<8 +} + +func load_le_byte2_uint16_idx_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOVB` + // 386:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`ORL`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` + return uint16(s[idx+1])<<8 | uint16(s[idx]) +} + +func load_le_byte4_uint32_idx(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + // amd64:`MOVL\s\([A-Z]+\)\([A-Z]+`,-`MOV[BW]`,-`OR` + return uint32(s[idx]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24 +} + +func load_le_byte4_uint32_idx_inv(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BH]` + return uint32(s[idx+3])<<24 | uint32(s[idx+2])<<16 | uint32(s[idx+1])<<8 | uint32(s[idx]) +} + +func load_le_byte8_uint64_idx(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + // amd64:`MOVQ\s\([A-Z]+\)\([A-Z]+`,-`MOV[BWL]`,-`OR` + return uint64(s[idx]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 | uint64(s[idx+4])<<32 | uint64(s[idx+5])<<40 | uint64(s[idx+6])<<48 | uint64(s[idx+7])<<56 +} + +func load_le_byte8_uint64_idx_inv(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[idx+7])<<56 | uint64(s[idx+6])<<48 | uint64(s[idx+5])<<40 | uint64(s[idx+4])<<32 | uint64(s[idx+3])<<24 | uint64(s[idx+2])<<16 | uint64(s[idx+1])<<8 | uint64(s[idx]) +} + +func load_be_byte2_uint16_idx(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` + return uint16(s[idx])<<8 | uint16(s[idx+1]) +} + +func load_be_byte2_uint16_idx_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` + // amd64:`MOVWLZX\s\([A-Z]+\)\([A-Z]+`,-`MOVB`,-`OR` + return uint16(s[idx+1]) | uint16(s[idx])<<8 +} + +func load_be_byte4_uint32_idx(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W` + return uint32(s[idx])<<24 | uint32(s[idx+1])<<16 | uint32(s[idx+2])<<8 | uint32(s[idx+3]) +} + +func load_be_byte8_uint64_idx(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W` + return uint64(s[idx])<<56 | uint64(s[idx+1])<<48 | uint64(s[idx+2])<<40 | uint64(s[idx+3])<<32 | uint64(s[idx+4])<<24 | uint64(s[idx+5])<<16 | uint64(s[idx+6])<<8 | uint64(s[idx+7]) +} + +func load_le_byte2_uint16_idx2(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB` + return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8 +} + +func load_le_byte2_uint16_idx2_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB` + return uint16(s[(idx<<1)+1])<<8 | uint16(s[idx<<1]) +} + +func load_le_byte4_uint32_idx4(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]` + return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24 +} + +func load_le_byte4_uint32_idx4_inv(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]` + return uint32(s[(idx<<2)+3])<<24 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+1])<<8 | uint32(s[idx<<2]) +} + +func load_le_byte8_uint64_idx8(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56 +} + +func load_le_byte8_uint64_idx8_inv(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]` + return uint64(s[(idx<<3)+7])<<56 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+1])<<8 | uint64(s[idx<<3]) +} + +func load_be_byte2_uint16_idx2(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB` + return uint16(s[idx<<1])<<8 | uint16(s[(idx<<1)+1]) +} + +func load_be_byte2_uint16_idx2_inv(s []byte, idx int) uint16 { + // arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,`REV16W`,-`ORR`,-`MOVB` + return uint16(s[(idx<<1)+1]) | uint16(s[idx<<1])<<8 +} + +func load_be_byte4_uint32_idx4(s []byte, idx int) uint32 { + // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,`REVW`,-`ORR`,-`MOV[BH]`,-`REV16W` + return uint32(s[idx<<2])<<24 | uint32(s[(idx<<2)+1])<<16 | uint32(s[(idx<<2)+2])<<8 | uint32(s[(idx<<2)+3]) +} + +func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 { + // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,`REV`,-`ORR`,-`MOV[BHW]`,-`REVW`,-`REV16W` + return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7]) +} + +// Check load combining across function calls. + +func fcall_byte(a [2]byte) [2]byte { + return fcall_byte(fcall_byte(a)) // amd64:`MOVW` +} + +func fcall_uint16(a [2]uint16) [2]uint16 { + return fcall_uint16(fcall_uint16(a)) // amd64:`MOVL` +} + +func fcall_uint32(a [2]uint32) [2]uint32 { + return fcall_uint32(fcall_uint32(a)) // amd64:`MOVQ` +} + +// We want to merge load+op in the first function, but not in the +// second. See Issue 19595. +func load_op_merge(p, q *int) { + x := *p // amd64:`ADDQ\t\(` + *q += x // The combined nilcheck and load would normally have this line number, but we want that combined operation to have the line number of the nil check instead (see #33724). +} +func load_op_no_merge(p, q *int) { + x := *p + for i := 0; i < 10; i++ { + *q += x // amd64:`ADDQ\t[A-Z]` + } +} + +// Make sure offsets are folded into loads and stores. +func offsets_fold(_, a [20]byte) (b [20]byte) { + // arm64:`MOVD\tcommand-line-arguments\.a\+[0-9]+\(FP\), R[0-9]+`,`MOVD\tR[0-9]+, command-line-arguments\.b\+[0-9]+\(FP\)` + b = a + return +} + +// Make sure we don't put pointers in SSE registers across safe +// points. + +func safe_point(p, q *[2]*int) { + a, b := p[0], p[1] // amd64:-`MOVUPS` + runtime.GC() + q[0], q[1] = a, b // amd64:-`MOVUPS` +} + +// ------------- // +// Storing // +// ------------- // + +func store_le64(b []byte, x uint64) { + // amd64:`MOVQ\s.*\(.*\)$`,-`SHR.` + // arm64:`MOVD`,-`MOV[WBH]` + // ppc64le:`MOVD\s`,-`MOV[BHW]\s` + // s390x:`MOVDBR\s.*\(.*\)$` + binary.LittleEndian.PutUint64(b, x) +} + +func store_le64_idx(b []byte, x uint64, idx int) { + // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` + // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]` + // ppc64le:`MOVD\s`,-`MOV[BHW]\s` + // s390x:`MOVDBR\s.*\(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint64(b[idx:], x) +} + +func store_le64_idx2(dst []byte, d, length, offset int) []byte { + a := dst[d : d+length] + b := dst[d-offset:] + // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` + binary.LittleEndian.PutUint64(a, binary.LittleEndian.Uint64(b)) + return dst +} + +func store_le64_idx_const(b []byte, idx int) { + // amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint64(b[idx:], 123) +} + +func store_le64_load(b []byte, x *[8]byte) { + _ = b[8] + // amd64:-`MOV[BWL]` + // arm64:-`MOV[BWH]` + // ppc64le:-`MOV[BWH]` + // s390x:-`MOVB`,-`MOV[WH]BR` + binary.LittleEndian.PutUint64(b, binary.LittleEndian.Uint64(x[:])) +} + +func store_le32(b []byte, x uint32) { + // amd64:`MOVL\s` + // arm64:`MOVW`,-`MOV[BH]` + // ppc64le:`MOVW\s` + // s390x:`MOVWBR\s.*\(.*\)$` + binary.LittleEndian.PutUint32(b, x) +} + +func store_le32_idx(b []byte, x uint32, idx int) { + // amd64:`MOVL\s` + // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]` + // ppc64le:`MOVW\s` + // s390x:`MOVWBR\s.*\(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint32(b[idx:], x) +} + +func store_le32_idx_const(b []byte, idx int) { + // amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint32(b[idx:], 123) +} + +func store_le16(b []byte, x uint16) { + // amd64:`MOVW\s` + // arm64:`MOVH`,-`MOVB` + // ppc64le:`MOVH\s` + // s390x:`MOVHBR\s.*\(.*\)$` + binary.LittleEndian.PutUint16(b, x) +} + +func store_le16_idx(b []byte, x uint16, idx int) { + // amd64:`MOVW\s` + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` + // ppc64le:`MOVH\s` + // s390x:`MOVHBR\s.*\(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint16(b[idx:], x) +} + +func store_le16_idx_const(b []byte, idx int) { + // amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint16(b[idx:], 123) +} + +func store_be64(b []byte, x uint64) { + // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.` + // amd64/v3: `MOVBEQ` + // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W` + // ppc64le:`MOVDBR` + // s390x:`MOVD\s.*\(.*\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint64(b, x) +} + +func store_be64_idx(b []byte, x uint64, idx int) { + // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.` + // amd64/v3:`MOVBEQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW` + // ppc64le:`MOVDBR` + // s390x:`MOVD\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint64(b[idx:], x) +} + +func store_be32(b []byte, x uint32) { + // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.` + // amd64/v3:`MOVBEL` + // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W` + // ppc64le:`MOVWBR` + // s390x:`MOVW\s.*\(.*\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint32(b, x) +} + +func store_be64_load(b, x *[8]byte) { + // arm64:-`REV` + // amd64:-`BSWAPQ` + binary.BigEndian.PutUint64(b[:], binary.BigEndian.Uint64(x[:])) +} + +func store_be32_load(b, x *[8]byte) { + // arm64:-`REVW` + // amd64:-`BSWAPL` + binary.BigEndian.PutUint32(b[:], binary.BigEndian.Uint32(x[:])) +} + +func store_be32_idx(b []byte, x uint32, idx int) { + // amd64/v1,amd64/v2:`BSWAPL`,-`SHR.` + // amd64/v3:`MOVBEL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W` + // ppc64le:`MOVWBR` + // s390x:`MOVW\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint32(b[idx:], x) +} + +func store_be16(b []byte, x uint16) { + // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.` + // amd64/v3:`MOVBEW`,-`ROLW` + // arm64:`MOVH`,`REV16W`,-`MOVB` + // ppc64le:`MOVHBR` + // s390x:`MOVH\s.*\(.*\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint16(b, x) +} + +func store_be16_idx(b []byte, x uint16, idx int) { + // amd64/v1,amd64/v2:`ROLW\s\$8`,-`SHR.` + // amd64/v3:`MOVBEW\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB` + // ppc64le:`MOVHBR` + // s390x:`MOVH\s.*\(.*\)\(.*\*1\)$`,-`SRW\s`,-`SRD\s` + binary.BigEndian.PutUint16(b[idx:], x) +} + +func store_le_byte_2(b []byte, val uint16) { + _ = b[2] + // arm64:`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB` + // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + b[1], b[2] = byte(val), byte(val>>8) +} + +func store_le_byte_2_inv(b []byte, val uint16) { + _ = b[2] + // 386:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + b[2], b[1] = byte(val>>8), byte(val) +} + +func store_le_byte_4(b []byte, val uint32) { + _ = b[4] + // arm64:`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH` + // 386:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW` + // amd64:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW` + b[1], b[2], b[3], b[4] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24) +} + +func store_le_byte_8(b []byte, val uint64) { + _ = b[8] + // arm64:`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW` + // amd64:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL` + b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val), byte(val>>8), byte(val>>16), byte(val>>24), byte(val>>32), byte(val>>40), byte(val>>48), byte(val>>56) +} + +func store_be_byte_2(b []byte, val uint16) { + _ = b[2] + // arm64:`REV16W`,`MOVH\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB` + // amd64/v1,amd64/v2:`MOVW\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB` + // amd64/v3: `MOVBEW` + b[1], b[2] = byte(val>>8), byte(val) +} + +func store_be_byte_4(b []byte, val uint32) { + _ = b[4] + // arm64:`REVW`,`MOVW\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W` + // amd64/v1,amd64/v2:`MOVL\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW` + // amd64/v3:`MOVBEL\s[A-Z]+,\s1\([A-Z]+\)` + b[1], b[2], b[3], b[4] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +func store_be_byte_8(b []byte, val uint64) { + _ = b[8] + // arm64:`REV`,`MOVD\sR[0-9]+,\s1\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`MOVW`,-`REV16W`,-`REVW` + // amd64/v1,amd64/v2:`MOVQ\s[A-Z]+,\s1\([A-Z]+\)`,-`MOVB`,-`MOVW`,-`MOVL` + // amd64/v3:`MOVBEQ\s[A-Z]+,\s1\([A-Z]+\)`, -`MOVBEL` + b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8] = byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +func store_le_byte_2_idx(b []byte, idx int, val uint16) { + _, _ = b[idx+0], b[idx+1] + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` + // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB` + b[idx+1], b[idx+0] = byte(val>>8), byte(val) +} + +func store_le_byte_2_idx_inv(b []byte, idx int, val uint16) { + _, _ = b[idx+0], b[idx+1] + // 386:`MOVW\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+`,-`MOVB` + b[idx+0], b[idx+1] = byte(val), byte(val>>8) +} + +func store_le_byte_4_idx(b []byte, idx int, val uint32) { + _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3] + // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH` + b[idx+3], b[idx+2], b[idx+1], b[idx+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +func store_be_byte_2_idx(b []byte, idx int, val uint16) { + _, _ = b[idx+0], b[idx+1] + // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` + b[idx+0], b[idx+1] = byte(val>>8), byte(val) +} + +func store_be_byte_4_idx(b []byte, idx int, val uint32) { + _, _, _, _ = b[idx+0], b[idx+1], b[idx+2], b[idx+3] + // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`,-`MOVH`,-`REV16W` + b[idx+0], b[idx+1], b[idx+2], b[idx+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +func store_be_byte_2_idx2(b []byte, idx int, val uint16) { + _, _ = b[(idx<<1)+0], b[(idx<<1)+1] + // arm64:`REV16W`,`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB` + b[(idx<<1)+0], b[(idx<<1)+1] = byte(val>>8), byte(val) +} + +func store_le_byte_2_idx2(b []byte, idx int, val uint16) { + _, _ = b[(idx<<1)+0], b[(idx<<1)+1] + // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB` + b[(idx<<1)+1], b[(idx<<1)+0] = byte(val>>8), byte(val) +} + +func store_be_byte_4_idx4(b []byte, idx int, val uint32) { + _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] + // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH`,-`REV16W` + b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +func store_le_byte_4_idx4_inv(b []byte, idx int, val uint32) { + _, _, _, _ = b[(idx<<2)+0], b[(idx<<2)+1], b[(idx<<2)+2], b[(idx<<2)+3] + // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOVB`,-`MOVH` + b[(idx<<2)+3], b[(idx<<2)+2], b[(idx<<2)+1], b[(idx<<2)+0] = byte(val>>24), byte(val>>16), byte(val>>8), byte(val) +} + +// ------------- // +// Zeroing // +// ------------- // + +// Check that zero stores are combined into larger stores + +func zero_byte_2(b1, b2 []byte) { + // bounds checks to guarantee safety of writes below + _, _ = b1[1], b2[1] + // arm64:"MOVH\tZR",-"MOVB" + // amd64:`MOVW\s[$]0,\s\([A-Z]+\)` + // 386:`MOVW\s[$]0,\s\([A-Z]+\)` + b1[0], b1[1] = 0, 0 + // arm64:"MOVH\tZR",-"MOVB" + // 386:`MOVW\s[$]0,\s\([A-Z]+\)` + // amd64:`MOVW\s[$]0,\s\([A-Z]+\)` + b2[1], b2[0] = 0, 0 +} + +func zero_byte_4(b1, b2 []byte) { + _, _ = b1[3], b2[3] + // arm64:"MOVW\tZR",-"MOVB",-"MOVH" + // amd64:`MOVL\s[$]0,\s\([A-Z]+\)` + // 386:`MOVL\s[$]0,\s\([A-Z]+\)` + b1[0], b1[1], b1[2], b1[3] = 0, 0, 0, 0 + // arm64:"MOVW\tZR",-"MOVB",-"MOVH" + b2[2], b2[3], b2[1], b2[0] = 0, 0, 0, 0 +} + +func zero_byte_8(b []byte) { + _ = b[7] + b[0], b[1], b[2], b[3] = 0, 0, 0, 0 + b[4], b[5], b[6], b[7] = 0, 0, 0, 0 // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" +} + +func zero_byte_16(b []byte) { + _ = b[15] + b[0], b[1], b[2], b[3] = 0, 0, 0, 0 + b[4], b[5], b[6], b[7] = 0, 0, 0, 0 + b[8], b[9], b[10], b[11] = 0, 0, 0, 0 + b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" +} + +func zero_byte_30(a *[30]byte) { + *a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" +} + +func zero_byte_39(a *[39]byte) { + *a = [39]byte{} // arm64:"MOVD",-"MOVB",-"MOVH",-"MOVW" +} + +func zero_byte_2_idx(b []byte, idx int) { + _, _ = b[idx+0], b[idx+1] + // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` + b[idx+0], b[idx+1] = 0, 0 +} + +func zero_byte_2_idx2(b []byte, idx int) { + _, _ = b[(idx<<1)+0], b[(idx<<1)+1] + // arm64:`MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB` + b[(idx<<1)+0], b[(idx<<1)+1] = 0, 0 +} + +func zero_uint16_2(h1, h2 []uint16) { + _, _ = h1[1], h2[1] + // arm64:"MOVW\tZR",-"MOVB",-"MOVH" + // amd64:`MOVL\s[$]0,\s\([A-Z]+\)` + // 386:`MOVL\s[$]0,\s\([A-Z]+\)` + h1[0], h1[1] = 0, 0 + // arm64:"MOVW\tZR",-"MOVB",-"MOVH" + // amd64:`MOVL\s[$]0,\s\([A-Z]+\)` + // 386:`MOVL\s[$]0,\s\([A-Z]+\)` + h2[1], h2[0] = 0, 0 +} + +func zero_uint16_4(h1, h2 []uint16) { + _, _ = h1[3], h2[3] + // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)` + h1[0], h1[1], h1[2], h1[3] = 0, 0, 0, 0 + // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + h2[2], h2[3], h2[1], h2[0] = 0, 0, 0, 0 +} + +func zero_uint16_8(h []uint16) { + _ = h[7] + h[0], h[1], h[2], h[3] = 0, 0, 0, 0 + h[4], h[5], h[6], h[7] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH" +} + +func zero_uint32_2(w1, w2 []uint32) { + _, _ = w1[1], w2[1] + // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)` + w1[0], w1[1] = 0, 0 + // arm64:"MOVD\tZR",-"MOVB",-"MOVH",-"MOVW" + // amd64:`MOVQ\s[$]0,\s\([A-Z]+\)` + w2[1], w2[0] = 0, 0 +} + +func zero_uint32_4(w1, w2 []uint32) { + _, _ = w1[3], w2[3] + w1[0], w1[1], w1[2], w1[3] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH" + w2[2], w2[3], w2[1], w2[0] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH" +} + +func zero_uint64_2(d1, d2 []uint64) { + _, _ = d1[1], d2[1] + d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH" + d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH" +} diff --git a/test/codegen/memops.go b/test/codegen/memops.go new file mode 100644 index 0000000..7e59d88 --- /dev/null +++ b/test/codegen/memops.go @@ -0,0 +1,367 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +var x [2]bool +var x8 [2]uint8 +var x16 [2]uint16 +var x32 [2]uint32 +var x64 [2]uint64 + +func compMem1() int { + // amd64:`CMPB\tcommand-line-arguments.x\+1\(SB\), [$]0` + if x[1] { + return 1 + } + // amd64:`CMPB\tcommand-line-arguments.x8\+1\(SB\), [$]7` + if x8[1] == 7 { + return 1 + } + // amd64:`CMPW\tcommand-line-arguments.x16\+2\(SB\), [$]7` + if x16[1] == 7 { + return 1 + } + // amd64:`CMPL\tcommand-line-arguments.x32\+4\(SB\), [$]7` + if x32[1] == 7 { + return 1 + } + // amd64:`CMPQ\tcommand-line-arguments.x64\+8\(SB\), [$]7` + if x64[1] == 7 { + return 1 + } + return 0 +} + +type T struct { + x bool + x8 uint8 + x16 uint16 + x32 uint32 + x64 uint64 + a [2]int // force it passed in memory +} + +func compMem2(t T) int { + // amd64:`CMPB\t.*\(SP\), [$]0` + if t.x { + return 1 + } + // amd64:`CMPB\t.*\(SP\), [$]7` + if t.x8 == 7 { + return 1 + } + // amd64:`CMPW\t.*\(SP\), [$]7` + if t.x16 == 7 { + return 1 + } + // amd64:`CMPL\t.*\(SP\), [$]7` + if t.x32 == 7 { + return 1 + } + // amd64:`CMPQ\t.*\(SP\), [$]7` + if t.x64 == 7 { + return 1 + } + return 0 +} + +func compMem3(x, y *int) (int, bool) { + // We can do comparisons of a register with memory even if + // the register is used subsequently. + r := *x + // amd64:`CMPQ\t\(` + // 386:`CMPL\t\(` + return r, r < *y +} + +// The following functions test that indexed load/store operations get generated. + +func idxInt8(x, y []int8, i int) { + var t int8 + // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + // 386: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // 386: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + y[i+1] = t + // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + // 386: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + x[i+1] = 77 +} + +func idxInt16(x, y []int16, i int) { + var t int16 + // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + y[i+1] = t + // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + // 386: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + // 386: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + y[16*i+1] = t + // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + x[i+1] = 77 + // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + // 386: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + x[16*i+1] = 77 +} + +func idxInt32(x, y []int32, i int) { + var t int32 + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + y[i+1] = t + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + t = x[2*i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + y[2*i+1] = t + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + // 386: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + y[16*i+1] = t + // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+1] = 77 + // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + x[16*i+1] = 77 +} + +func idxInt64(x, y []int64, i int) { + var t int64 + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + y[i+1] = t + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + y[16*i+1] = t + // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+1] = 77 + // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + x[16*i+1] = 77 +} + +func idxFloat32(x, y []float32, i int) { + var t float32 + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+` + t = x[i+1] + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)` + y[i+1] = t + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + t = x[16*i+1] + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + y[16*i+1] = t +} + +func idxFloat64(x, y []float64, i int) { + var t float64 + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+` + t = x[i+1] + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)` + y[i+1] = t + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + t = x[16*i+1] + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + y[16*i+1] = t +} + +func idxLoadPlusOp32(x []int32, i int) int32 { + s := x[0] + // 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + // amd64: `ADDL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s += x[i+1] + // 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + // amd64: `SUBL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s -= x[i+2] + // 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + s *= x[i+3] + // 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + // amd64: `ANDL\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s &= x[i+4] + // 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + // amd64: `ORL\t20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s |= x[i+5] + // 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+` + // amd64: `XORL\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + s ^= x[i+6] + return s +} + +func idxLoadPlusOp64(x []int64, i int) int64 { + s := x[0] + // amd64: `ADDQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s += x[i+1] + // amd64: `SUBQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s -= x[i+2] + // amd64: `ANDQ\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s &= x[i+3] + // amd64: `ORQ\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s |= x[i+4] + // amd64: `XORQ\t40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + s ^= x[i+5] + return s +} + +func idxStorePlusOp32(x []int32, i int, v int32) { + // 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ADDL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+1] += v + // 386: `SUBL\t[A-Z]+, 8\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `SUBL\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+2] -= v + // 386: `ANDL\t[A-Z]+, 12\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ANDL\t[A-Z]+[0-9]*, 12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+3] &= v + // 386: `ORL\t[A-Z]+, 16\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ORL\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+4] |= v + // 386: `XORL\t[A-Z]+, 20\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `XORL\t[A-Z]+[0-9]*, 20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+5] ^= v + + // 386: `ADDL\t[$]77, 24\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ADDL\t[$]77, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+6] += 77 + // 386: `ANDL\t[$]77, 28\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ANDL\t[$]77, 28\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+7] &= 77 + // 386: `ORL\t[$]77, 32\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `ORL\t[$]77, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+8] |= 77 + // 386: `XORL\t[$]77, 36\([A-Z]+\)\([A-Z]+\*4\)` + // amd64: `XORL\t[$]77, 36\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+9] ^= 77 +} + +func idxStorePlusOp64(x []int64, i int, v int64) { + // amd64: `ADDQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+1] += v + // amd64: `SUBQ\t[A-Z]+[0-9]*, 16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+2] -= v + // amd64: `ANDQ\t[A-Z]+[0-9]*, 24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+3] &= v + // amd64: `ORQ\t[A-Z]+[0-9]*, 32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+4] |= v + // amd64: `XORQ\t[A-Z]+[0-9]*, 40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+5] ^= v + + // amd64: `ADDQ\t[$]77, 48\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+6] += 77 + // amd64: `ANDQ\t[$]77, 56\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+7] &= 77 + // amd64: `ORQ\t[$]77, 64\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+8] |= 77 + // amd64: `XORQ\t[$]77, 72\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+9] ^= 77 +} + +func idxCompare(i int) int { + // amd64: `MOVBLZX\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + if x8[i+1] < x8[0] { + return 0 + } + // amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + if x16[i+1] < x16[0] { + return 0 + } + // amd64: `MOVWLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + if x16[16*i+1] < x16[0] { + return 0 + } + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + if x32[i+1] < x32[0] { + return 0 + } + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + if x32[16*i+1] < x32[0] { + return 0 + } + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + if x64[i+1] < x64[0] { + return 0 + } + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*` + if x64[16*i+1] < x64[0] { + return 0 + } + // amd64: `MOVBLZX\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + if x8[i+2] < 77 { + return 0 + } + // amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + if x16[i+2] < 77 { + return 0 + } + // amd64: `MOVWLZX\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + if x16[16*i+2] < 77 { + return 0 + } + // amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + if x32[i+2] < 77 { + return 0 + } + // amd64: `MOVL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + if x32[16*i+2] < 77 { + return 0 + } + // amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + if x64[i+2] < 77 { + return 0 + } + // amd64: `MOVQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*` + if x64[16*i+2] < 77 { + return 0 + } + return 1 +} + +func idxFloatOps(a []float64, b []float32, i int) (float64, float32) { + c := float64(7) + // amd64: `ADDSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c += a[i+1] + // amd64: `SUBSD\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c -= a[i+2] + // amd64: `MULSD\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c *= a[i+3] + // amd64: `DIVSD\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + c /= a[i+4] + + d := float32(8) + // amd64: `ADDSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d += b[i+1] + // amd64: `SUBSS\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d -= b[i+2] + // amd64: `MULSS\t12\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d *= b[i+3] + // amd64: `DIVSS\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + d /= b[i+4] + return c, d +} diff --git a/test/codegen/noextend.go b/test/codegen/noextend.go new file mode 100644 index 0000000..d8e2917 --- /dev/null +++ b/test/codegen/noextend.go @@ -0,0 +1,221 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +var sval64 [8]int64 +var sval32 [8]int32 +var sval16 [8]int16 +var sval8 [8]int8 +var val64 [8]uint64 +var val32 [8]uint32 +var val16 [8]uint16 +var val8 [8]uint8 + +// Avoid zero/sign extensions following a load +// which has extended the value correctly. +// Note: No tests are done for int8 since +// an extra extension is usually needed due to +// no signed byte load. + +func set16(x8 int8, u8 *uint8, y8 int8, z8 uint8) { + // Truncate not needed, load does sign/zero extend + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + val16[0] = uint16(*u8) + + // AND not needed due to size + // ppc64:-"ANDCC" + // ppc64le:-"ANDCC" + sval16[1] = 255 & int16(x8+y8) + + // ppc64:-"ANDCC" + // ppc64le:-"ANDCC" + val16[1] = 255 & uint16(*u8+z8) + +} +func shiftidx(u8 *uint8, x16 *int16, u16 *uint16) { + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + val16[0] = uint16(sval16[*u8>>2]) + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + sval16[1] = int16(val16[*x16>>1]) + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + val16[1] = uint16(sval16[*u16>>2]) + +} + +func setnox(x8 int8, u8 *uint8, y8 *int8, z8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) { + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + val16[0] = uint16(*u8) + + // AND not needed due to size + // ppc64:-"ANDCC" + // ppc64le:-"ANDCC" + sval16[1] = 255 & int16(x8+*y8) + + // ppc64:-"ANDCC" + // ppc64le:-"ANDCC" + val16[1] = 255 & uint16(*u8+*z8) + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + sval32[1] = int32(*x16) + + //ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + //ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + val32[0] = uint32(*u8) + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + val32[1] = uint32(*u16) + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + sval64[1] = int64(*x16) + + // ppc64:-"MOVW\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVW\tR\\d+,\\sR\\d+" + sval64[2] = int64(*x32) + + //ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + //ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + val64[0] = uint64(*u8) + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + val64[1] = uint64(*u16) + + // ppc64:-"MOVWZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+" + val64[2] = uint64(*u32) +} + +func cmp16(u8 *uint8, x32 *int32, u32 *uint32, x64 *int64, u64 *uint64) bool { + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + if uint16(*u8) == val16[0] { + return true + } + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if uint16(*u32>>16) == val16[0] { + return true + } + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if uint16(*u64>>48) == val16[0] { + return true + } + + // Verify the truncates are using the correct sign. + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if int16(*x32) == sval16[0] { + return true + } + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + if uint16(*u32) == val16[0] { + return true + } + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if int16(*x64) == sval16[0] { + return true + } + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + if uint16(*u64) == val16[0] { + return true + } + + return false +} + +func cmp32(u8 *uint8, x16 *int16, u16 *uint16, x64 *int64, u64 *uint64) bool { + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + if uint32(*u8) == val32[0] { + return true + } + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + if int32(*x16) == sval32[0] { + return true + } + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if uint32(*u16) == val32[0] { + return true + } + + // Verify the truncates are using the correct sign. + // ppc64:-"MOVWZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+" + if int32(*x64) == sval32[0] { + return true + } + + // ppc64:-"MOVW\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVW\tR\\d+,\\sR\\d+" + if uint32(*u64) == val32[0] { + return true + } + + return false +} + +func cmp64(u8 *uint8, x16 *int16, u16 *uint16, x32 *int32, u32 *uint32) bool { + + // ppc64:-"MOVBZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+" + if uint64(*u8) == val64[0] { + return true + } + + // ppc64:-"MOVH\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVH\tR\\d+,\\sR\\d+" + if int64(*x16) == sval64[0] { + return true + } + + // ppc64:-"MOVHZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+" + if uint64(*u16) == val64[0] { + return true + } + + // ppc64:-"MOVW\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVW\tR\\d+,\\sR\\d+" + if int64(*x32) == sval64[0] { + return true + } + + // ppc64:-"MOVWZ\tR\\d+,\\sR\\d+" + // ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+" + if uint64(*u32) == val64[0] { + return true + } + return false +} diff --git a/test/codegen/race.go b/test/codegen/race.go new file mode 100644 index 0000000..b977823 --- /dev/null +++ b/test/codegen/race.go @@ -0,0 +1,22 @@ +// asmcheck -race + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// Check that we elide racefuncenter/racefuncexit for +// functions with no calls (but which might panic +// in various ways). See issue 31219. +// amd64:-"CALL.*racefuncenter.*" +// arm64:-"CALL.*racefuncenter.*" +// ppc64le:-"CALL.*racefuncenter.*" +func RaceMightPanic(a []int, i, j, k, s int) { + var b [4]int + _ = b[i] // panicIndex + _ = a[i:j] // panicSlice + _ = a[i:j:k] // also panicSlice + _ = i << s // panicShift + _ = i / j // panicDivide +} diff --git a/test/codegen/regabi_regalloc.go b/test/codegen/regabi_regalloc.go new file mode 100644 index 0000000..a7b7bd5 --- /dev/null +++ b/test/codegen/regabi_regalloc.go @@ -0,0 +1,23 @@ +// asmcheck + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +//go:registerparams +func f1(a, b int) { + // amd64:"MOVQ\tBX, CX", "MOVQ\tAX, BX", "MOVL\t\\$1, AX", -"MOVQ\t.*DX" + g(1, a, b) +} + +//go:registerparams +func f2(a, b int) { + // amd64:"MOVQ\tBX, AX", "MOVQ\t[AB]X, CX", -"MOVQ\t.*, BX" + g(b, b, b) +} + +//go:noinline +//go:registerparams +func g(int, int, int) {} diff --git a/test/codegen/retpoline.go b/test/codegen/retpoline.go new file mode 100644 index 0000000..a04a005 --- /dev/null +++ b/test/codegen/retpoline.go @@ -0,0 +1,42 @@ +// +build amd64 +// asmcheck -gcflags=-spectre=ret + +package codegen + +func CallFunc(f func()) { + // amd64:`CALL\truntime.retpoline` + f() +} + +func CallInterface(x interface{ M() }) { + // amd64:`CALL\truntime.retpoline` + x.M() +} + +// Check to make sure that jump tables are disabled +// when retpoline is on. See issue 57097. +func noJumpTables(x int) int { + switch x { + case 0: + return 0 + case 1: + return 1 + case 2: + return 2 + case 3: + return 3 + case 4: + return 4 + case 5: + return 5 + case 6: + return 6 + case 7: + return 7 + case 8: + return 8 + case 9: + return 9 + } + return 10 +} diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go new file mode 100644 index 0000000..b22288f --- /dev/null +++ b/test/codegen/rotate.go @@ -0,0 +1,279 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "math/bits" + +// ------------------- // +// const rotates // +// ------------------- // + +func rot64(x uint64) uint64 { + var a uint64 + + // amd64:"ROLQ\t[$]7" + // ppc64:"ROTL\t[$]7" + // ppc64le:"ROTL\t[$]7" + // loong64: "ROTRV\t[$]57" + a += x<<7 | x>>57 + + // amd64:"ROLQ\t[$]8" + // arm64:"ROR\t[$]56" + // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " + // ppc64:"ROTL\t[$]8" + // ppc64le:"ROTL\t[$]8" + // loong64: "ROTRV\t[$]56" + a += x<<8 + x>>56 + + // amd64:"ROLQ\t[$]9" + // arm64:"ROR\t[$]55" + // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " + // ppc64:"ROTL\t[$]9" + // ppc64le:"ROTL\t[$]9" + // loong64: "ROTRV\t[$]55" + a += x<<9 ^ x>>55 + + // amd64:"ROLQ\t[$]10" + // arm64:"ROR\t[$]54" + // s390x:"RISBGZ\t[$]0, [$]63, [$]10, " + // ppc64:"ROTL\t[$]10" + // ppc64le:"ROTL\t[$]10" + // arm64:"ROR\t[$]54" + // s390x:"RISBGZ\t[$]0, [$]63, [$]10, " + // loong64: "ROTRV\t[$]54" + a += bits.RotateLeft64(x, 10) + + return a +} + +func rot32(x uint32) uint32 { + var a uint32 + + // amd64:"ROLL\t[$]7" + // arm:"MOVW\tR\\d+@>25" + // ppc64:"ROTLW\t[$]7" + // ppc64le:"ROTLW\t[$]7" + // loong64: "ROTR\t[$]25" + a += x<<7 | x>>25 + + // amd64:`ROLL\t[$]8` + // arm:"MOVW\tR\\d+@>24" + // arm64:"RORW\t[$]24" + // s390x:"RLL\t[$]8" + // ppc64:"ROTLW\t[$]8" + // ppc64le:"ROTLW\t[$]8" + // loong64: "ROTR\t[$]24" + a += x<<8 + x>>24 + + // amd64:"ROLL\t[$]9" + // arm:"MOVW\tR\\d+@>23" + // arm64:"RORW\t[$]23" + // s390x:"RLL\t[$]9" + // ppc64:"ROTLW\t[$]9" + // ppc64le:"ROTLW\t[$]9" + // loong64: "ROTR\t[$]23" + a += x<<9 ^ x>>23 + + // amd64:"ROLL\t[$]10" + // arm:"MOVW\tR\\d+@>22" + // arm64:"RORW\t[$]22" + // s390x:"RLL\t[$]10" + // ppc64:"ROTLW\t[$]10" + // ppc64le:"ROTLW\t[$]10" + // arm64:"RORW\t[$]22" + // s390x:"RLL\t[$]10" + // loong64: "ROTR\t[$]22" + a += bits.RotateLeft32(x, 10) + + return a +} + +func rot16(x uint16) uint16 { + var a uint16 + + // amd64:"ROLW\t[$]7" + a += x<<7 | x>>9 + + // amd64:`ROLW\t[$]8` + a += x<<8 + x>>8 + + // amd64:"ROLW\t[$]9" + a += x<<9 ^ x>>7 + + return a +} + +func rot8(x uint8) uint8 { + var a uint8 + + // amd64:"ROLB\t[$]5" + a += x<<5 | x>>3 + + // amd64:`ROLB\t[$]6` + a += x<<6 + x>>2 + + // amd64:"ROLB\t[$]7" + a += x<<7 ^ x>>1 + + return a +} + +// ----------------------- // +// non-const rotates // +// ----------------------- // + +func rot64nc(x uint64, z uint) uint64 { + var a uint64 + + z &= 63 + + // amd64:"ROLQ",-"AND" + // arm64:"ROR","NEG",-"AND" + // ppc64:"ROTL",-"NEG",-"AND" + // ppc64le:"ROTL",-"NEG",-"AND" + // loong64: "ROTRV", -"AND" + a += x<<z | x>>(64-z) + + // amd64:"RORQ",-"AND" + // arm64:"ROR",-"NEG",-"AND" + // ppc64:"ROTL","NEG",-"AND" + // ppc64le:"ROTL","NEG",-"AND" + // loong64: "ROTRV", -"AND" + a += x>>z | x<<(64-z) + + return a +} + +func rot32nc(x uint32, z uint) uint32 { + var a uint32 + + z &= 31 + + // amd64:"ROLL",-"AND" + // arm64:"ROR","NEG",-"AND" + // ppc64:"ROTLW",-"NEG",-"AND" + // ppc64le:"ROTLW",-"NEG",-"AND" + // loong64: "ROTR", -"AND" + a += x<<z | x>>(32-z) + + // amd64:"RORL",-"AND" + // arm64:"ROR",-"NEG",-"AND" + // ppc64:"ROTLW","NEG",-"AND" + // ppc64le:"ROTLW","NEG",-"AND" + // loong64: "ROTR", -"AND" + a += x>>z | x<<(32-z) + + return a +} + +func rot16nc(x uint16, z uint) uint16 { + var a uint16 + + z &= 15 + + // amd64:"ROLW",-"ANDQ" + a += x<<z | x>>(16-z) + + // amd64:"RORW",-"ANDQ" + a += x>>z | x<<(16-z) + + return a +} + +func rot8nc(x uint8, z uint) uint8 { + var a uint8 + + z &= 7 + + // amd64:"ROLB",-"ANDQ" + a += x<<z | x>>(8-z) + + // amd64:"RORB",-"ANDQ" + a += x>>z | x<<(8-z) + + return a +} + +// Issue 18254: rotate after inlining +func f32(x uint32) uint32 { + // amd64:"ROLL\t[$]7" + return rot32nc(x, 7) +} + +func doubleRotate(x uint64) uint64 { + x = (x << 5) | (x >> 59) + // amd64:"ROLQ\t[$]15" + // arm64:"ROR\t[$]49" + x = (x << 10) | (x >> 54) + return x +} + +// --------------------------------------- // +// Combined Rotate + Masking operations // +// --------------------------------------- // + +func checkMaskedRotate32(a []uint32, r int) { + i := 0 + + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF, 16) + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16) + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000 + i++ + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], r) & 0xFF00 + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], r) & 0xFFF00FFF + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 4) & 0xFFF00FFF + i++ +} + +// combined arithmetic and rotate on arm64 +func checkArithmeticWithRotate(a *[1000]uint64) { + // arm64: "AND\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[2] = a[1] & bits.RotateLeft64(a[0], 13) + // arm64: "ORR\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[5] = a[4] | bits.RotateLeft64(a[3], 13) + // arm64: "EOR\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[8] = a[7] ^ bits.RotateLeft64(a[6], 13) + // arm64: "MVN\tR[0-9]+@>51, R[0-9]+" + a[10] = ^bits.RotateLeft64(a[9], 13) + // arm64: "BIC\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[13] = a[12] &^ bits.RotateLeft64(a[11], 13) + // arm64: "EON\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[16] = a[15] ^ ^bits.RotateLeft64(a[14], 13) + // arm64: "ORN\tR[0-9]+@>51, R[0-9]+, R[0-9]+" + a[19] = a[18] | ^bits.RotateLeft64(a[17], 13) + // arm64: "TST\tR[0-9]+@>51, R[0-9]+" + if a[18]&bits.RotateLeft64(a[19], 13) == 0 { + a[20] = 1 + } + +} diff --git a/test/codegen/select.go b/test/codegen/select.go new file mode 100644 index 0000000..82f6d1c --- /dev/null +++ b/test/codegen/select.go @@ -0,0 +1,20 @@ +// asmcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func f() { + ch1 := make(chan int) + ch2 := make(chan int) + for { + // amd64:-`MOVQ\t[$]0, command-line-arguments..autotmp_3` + select { + case <-ch1: + case <-ch2: + default: + } + } +} diff --git a/test/codegen/shift.go b/test/codegen/shift.go new file mode 100644 index 0000000..4a9f5d4 --- /dev/null +++ b/test/codegen/shift.go @@ -0,0 +1,486 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// ------------------ // +// constant shifts // +// ------------------ // + +func lshConst64x64(v int64) int64 { + // ppc64:"SLD" + // ppc64le:"SLD" + // riscv64:"SLLI",-"AND",-"SLTIU" + return v << uint64(33) +} + +func rshConst64Ux64(v uint64) uint64 { + // ppc64:"SRD" + // ppc64le:"SRD" + // riscv64:"SRLI",-"AND",-"SLTIU" + return v >> uint64(33) +} + +func rshConst64x64(v int64) int64 { + // ppc64:"SRAD" + // ppc64le:"SRAD" + // riscv64:"SRAI",-"OR",-"SLTIU" + return v >> uint64(33) +} + +func lshConst32x64(v int32) int32 { + // ppc64:"SLW" + // ppc64le:"SLW" + // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" + return v << uint64(29) +} + +func rshConst32Ux64(v uint32) uint32 { + // ppc64:"SRW" + // ppc64le:"SRW" + // riscv64:"SRLI",-"AND",-"SLTIU", -"MOVW" + return v >> uint64(29) +} + +func rshConst32x64(v int32) int32 { + // ppc64:"SRAW" + // ppc64le:"SRAW" + // riscv64:"SRAI",-"OR",-"SLTIU", -"MOVW" + return v >> uint64(29) +} + +func lshConst64x32(v int64) int64 { + // ppc64:"SLD" + // ppc64le:"SLD" + // riscv64:"SLLI",-"AND",-"SLTIU" + return v << uint32(33) +} + +func rshConst64Ux32(v uint64) uint64 { + // ppc64:"SRD" + // ppc64le:"SRD" + // riscv64:"SRLI",-"AND",-"SLTIU" + return v >> uint32(33) +} + +func rshConst64x32(v int64) int64 { + // ppc64:"SRAD" + // ppc64le:"SRAD" + // riscv64:"SRAI",-"OR",-"SLTIU" + return v >> uint32(33) +} + +// ------------------ // +// masked shifts // +// ------------------ // + +func lshMask64x64(v int64, s uint64) int64 { + // arm64:"LSL",-"AND" + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v << (s & 63) +} + +func rshMask64Ux64(v uint64, s uint64) uint64 { + // arm64:"LSR",-"AND",-"CSEL" + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SRL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func rshMask64x64(v int64, s uint64) int64 { + // arm64:"ASR",-"AND",-"CSEL" + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-ORN",-"ISEL" + // riscv64:"SRA",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func lshMask32x64(v int32, s uint64) int32 { + // arm64:"LSL",-"AND" + // ppc64:"ISEL",-"ORN" + // ppc64le:"ISEL",-"ORN" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v << (s & 63) +} + +func rshMask32Ux64(v uint32, s uint64) uint32 { + // arm64:"LSR",-"AND" + // ppc64:"ISEL",-"ORN" + // ppc64le:"ISEL",-"ORN" + // riscv64:"SRL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func rshMask32x64(v int32, s uint64) int32 { + // arm64:"ASR",-"AND" + // ppc64:"ISEL",-"ORN" + // ppc64le:"ISEL",-"ORN" + // riscv64:"SRA",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func lshMask64x32(v int64, s uint32) int64 { + // arm64:"LSL",-"AND" + // ppc64:"ANDCC",-"ORN" + // ppc64le:"ANDCC",-"ORN" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v << (s & 63) +} + +func rshMask64Ux32(v uint64, s uint32) uint64 { + // arm64:"LSR",-"AND",-"CSEL" + // ppc64:"ANDCC",-"ORN" + // ppc64le:"ANDCC",-"ORN" + // riscv64:"SRL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func rshMask64x32(v int64, s uint32) int64 { + // arm64:"ASR",-"AND",-"CSEL" + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SRA",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> (s & 63) +} + +func lshMask64x32Ext(v int64, s int32) int64 { + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v << uint(s&63) +} + +func rshMask64Ux32Ext(v uint64, s int32) uint64 { + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SRL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> uint(s&63) +} + +func rshMask64x32Ext(v int64, s int32) int64 { + // ppc64:"ANDCC",-"ORN",-"ISEL" + // ppc64le:"ANDCC",-"ORN",-"ISEL" + // riscv64:"SRA",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v >> uint(s&63) +} + +// --------------- // +// signed shifts // +// --------------- // + +// We do want to generate a test + panicshift for these cases. +func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) { + // amd64:"TESTB" + _ = x << v8 + // amd64:"TESTW" + _ = x << v16 + // amd64:"TESTL" + _ = x << v32 + // amd64:"TESTQ" + _ = x << v64 +} + +// We want to avoid generating a test + panicshift for these cases. +func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) { + // amd64:-"TESTB" + _ = x << (v8 & 7) + // amd64:-"TESTW" + _ = x << (v16 & 15) + // amd64:-"TESTL" + _ = x << (v32 & 31) + // amd64:-"TESTQ" + _ = x << (v64 & 63) +} + +// ------------------ // +// bounded shifts // +// ------------------ // + +func lshGuarded64(v int64, s uint) int64 { + if s < 64 { + // riscv64:"SLL",-"AND",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" + // arm64:"LSL",-"CSEL" + return v << s + } + panic("shift too large") +} + +func rshGuarded64U(v uint64, s uint) uint64 { + if s < 64 { + // riscv64:"SRL",-"AND",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" + // arm64:"LSR",-"CSEL" + return v >> s + } + panic("shift too large") +} + +func rshGuarded64(v int64, s uint) int64 { + if s < 64 { + // riscv64:"SRA",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" + // arm64:"ASR",-"CSEL" + return v >> s + } + panic("shift too large") +} + +func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) { + if shift >= 0 && shift < 64 { + // arm64:"LSL",-"CSEL" + r1 = val64 << shift + } + if shift >= 0 && shift < 32 { + // arm64:"LSL",-"CSEL" + r2 = val32 << shift + } + if shift >= 0 && shift < 16 { + // arm64:"LSL",-"CSEL" + r3 = val16 << shift + } + if shift >= 0 && shift < 8 { + // arm64:"LSL",-"CSEL" + r4 = val8 << shift + } + return r1, r2, r3, r4 +} + +func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) { + if shift >= 0 && shift < 64 { + // arm64:"LSL",-"CSEL" + r1 = val64 << shift + } + if shift >= 0 && shift < 32 { + // arm64:"LSL",-"CSEL" + r2 = val32 << shift + } + if shift >= 0 && shift < 16 { + // arm64:"LSL",-"CSEL" + r3 = val16 << shift + } + if shift >= 0 && shift < 8 { + // arm64:"LSL",-"CSEL" + r4 = val8 << shift + } + return r1, r2, r3, r4 +} + +func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) { + if shift >= 0 && shift < 64 { + // arm64:"LSR",-"CSEL" + r1 = val64 >> shift + } + if shift >= 0 && shift < 32 { + // arm64:"LSR",-"CSEL" + r2 = val32 >> shift + } + if shift >= 0 && shift < 16 { + // arm64:"LSR",-"CSEL" + r3 = val16 >> shift + } + if shift >= 0 && shift < 8 { + // arm64:"LSR",-"CSEL" + r4 = val8 >> shift + } + return r1, r2, r3, r4 +} + +func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) { + if shift >= 0 && shift < 64 { + // arm64:"ASR",-"CSEL" + r1 = val64 >> shift + } + if shift >= 0 && shift < 32 { + // arm64:"ASR",-"CSEL" + r2 = val32 >> shift + } + if shift >= 0 && shift < 16 { + // arm64:"ASR",-"CSEL" + r3 = val16 >> shift + } + if shift >= 0 && shift < 8 { + // arm64:"ASR",-"CSEL" + r4 = val8 >> shift + } + return r1, r2, r3, r4 +} + +func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) { + + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f := tab[byte(v)^b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)&b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[byte(v)|b] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)&h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)^h] + // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + f += tab[uint16(v)|h] + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + f += tab[v&0xff] + // ppc64le:-".*AND",".*CLRLSLWI" + // ppc64:-".*AND",".*CLRLSLWI" + f += 2 * uint32(uint16(d)) + // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" + // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" + g := 2 * uint64(uint32(d)) + return f, g +} + +func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) { + + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + f := (v8 & 0xF) << 2 + // ppc64le:"CLRLSLWI" + // ppc64:"CLRLSLWI" + f += byte(v16) << 3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + g := (v16 & 0xFF) << 3 + // ppc64le:-"AND","CLRLSLWI" + // ppc64:-"AND","CLRLSLWI" + h := (v32 & 0xFFFFF) << 2 + // ppc64le:"CLRLSLDI" + // ppc64:"CLRLSLDI" + i := (v64 & 0xFFFFFFFF) << 5 + // ppc64le:-"CLRLSLDI" + // ppc64:-"CLRLSLDI" + i += (v64 & 0xFFFFFFF) << 38 + // ppc64le/power9:-"CLRLSLDI" + // ppc64/power9:-"CLRLSLDI" + i += (v64 & 0xFFFF00) << 10 + // ppc64le/power9:-"SLD","EXTSWSLI" + // ppc64/power9:-"SLD","EXTSWSLI" + j := int64(x32+32) * 8 + return f, g, h, i, j +} + +func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { + + // ppc64le:-".*MOVW" + f := int32(v >> 32) + // ppc64le:".*MOVW" + f += int32(v >> 31) + // ppc64le:-".*MOVH" + g := int16(v >> 48) + // ppc64le:".*MOVH" + g += int16(v >> 30) + // ppc64le:-".*MOVH" + g += int16(f >> 16) + // ppc64le:-".*MOVB" + h := int8(v >> 56) + // ppc64le:".*MOVB" + h += int8(v >> 28) + // ppc64le:-".*MOVB" + h += int8(f >> 24) + // ppc64le:".*MOVB" + h += int8(f >> 16) + return int64(h), uint64(g) +} + +func checkShiftAndMask32(v []uint32) { + i := 0 + + // ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+" + // ppc64: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+" + v[i] = (v[i] & 0xFF00000) >> 8 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+" + v[i] = (v[i] & 0xFF00) >> 6 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xFF) >> 8 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xF000000) >> 28 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 20) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 24) & 0xFF00 + i++ +} + +func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) { + // ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+" + // ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+" + a[0] = a[uint8(v>>24)] + // ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+" + // ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+" + b[0] = b[uint8(v>>24)] + // ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+" + // ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+" + b[1] = b[(v>>20)&0xFF] + // ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+" + // ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+" + b[2] = b[v>>25] +} + +// 128 bit shifts + +func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) { + s := bits & 63 + ŝ := (64 - bits) & 63 + // check that the shift operation has two commas (three operands) + // amd64:"SHRQ.*,.*," + shr := x>>s | y<<ŝ + // amd64:"SHLQ.*,.*," + shl := x<<s | y>>ŝ + return shr, shl +} + +func checkShiftToMask(u []uint64, s []int64) { + // amd64:-"SHR",-"SHL","ANDQ" + u[0] = u[0] >> 5 << 5 + // amd64:-"SAR",-"SHL","ANDQ" + s[0] = s[0] >> 5 << 5 + // amd64:-"SHR",-"SHL","ANDQ" + u[1] = u[1] << 5 >> 5 +} diff --git a/test/codegen/shortcircuit.go b/test/codegen/shortcircuit.go new file mode 100644 index 0000000..e971dca --- /dev/null +++ b/test/codegen/shortcircuit.go @@ -0,0 +1,17 @@ +// asmcheck + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func efaceExtract(e interface{}) int { + // This should be compiled with only + // a single conditional jump. + // amd64:-"JMP" + if x, ok := e.(int); ok { + return x + } + return 0 +} diff --git a/test/codegen/slices.go b/test/codegen/slices.go new file mode 100644 index 0000000..fa4142d --- /dev/null +++ b/test/codegen/slices.go @@ -0,0 +1,433 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "unsafe" + +// This file contains code generation tests related to the handling of +// slice types. + +// ------------------ // +// Clear // +// ------------------ // + +// Issue #5373 optimize memset idiom + +func SliceClear(s []int) []int { + // amd64:`.*memclrNoHeapPointers` + // ppc64le:`.*memclrNoHeapPointers` + // ppc64:`.*memclrNoHeapPointers` + for i := range s { + s[i] = 0 + } + return s +} + +func SliceClearPointers(s []*int) []*int { + // amd64:`.*memclrHasPointers` + // ppc64le:`.*memclrHasPointers` + // ppc64:`.*memclrHasPointers` + for i := range s { + s[i] = nil + } + return s +} + +// ------------------ // +// Extension // +// ------------------ // + +// Issue #21266 - avoid makeslice in append(x, make([]T, y)...) + +func SliceExtensionConst(s []int) []int { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:-`.*runtime\.panicmakeslicelen` + // ppc64le:`.*runtime\.memclrNoHeapPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64le:-`.*runtime\.panicmakeslicelen` + // ppc64:`.*runtime\.memclrNoHeapPointers` + // ppc64:-`.*runtime\.makeslice` + // ppc64:-`.*runtime\.panicmakeslicelen` + return append(s, make([]int, 1<<2)...) +} + +func SliceExtensionConstInt64(s []int) []int { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:-`.*runtime\.panicmakeslicelen` + // ppc64le:`.*runtime\.memclrNoHeapPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64le:-`.*runtime\.panicmakeslicelen` + // ppc64:`.*runtime\.memclrNoHeapPointers` + // ppc64:-`.*runtime\.makeslice` + // ppc64:-`.*runtime\.panicmakeslicelen` + return append(s, make([]int, int64(1<<2))...) +} + +func SliceExtensionConstUint64(s []int) []int { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:-`.*runtime\.panicmakeslicelen` + // ppc64le:`.*runtime\.memclrNoHeapPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64le:-`.*runtime\.panicmakeslicelen` + // ppc64:`.*runtime\.memclrNoHeapPointers` + // ppc64:-`.*runtime\.makeslice` + // ppc64:-`.*runtime\.panicmakeslicelen` + return append(s, make([]int, uint64(1<<2))...) +} + +func SliceExtensionConstUint(s []int) []int { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:-`.*runtime\.panicmakeslicelen` + // ppc64le:`.*runtime\.memclrNoHeapPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64le:-`.*runtime\.panicmakeslicelen` + // ppc64:`.*runtime\.memclrNoHeapPointers` + // ppc64:-`.*runtime\.makeslice` + // ppc64:-`.*runtime\.panicmakeslicelen` + return append(s, make([]int, uint(1<<2))...) +} + +func SliceExtensionPointer(s []*int, l int) []*int { + // amd64:`.*runtime\.memclrHasPointers` + // amd64:-`.*runtime\.makeslice` + // ppc64le:`.*runtime\.memclrHasPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64:`.*runtime\.memclrHasPointers` + // ppc64:-`.*runtime\.makeslice` + return append(s, make([]*int, l)...) +} + +func SliceExtensionVar(s []byte, l int) []byte { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // ppc64le:`.*runtime\.memclrNoHeapPointers` + // ppc64le:-`.*runtime\.makeslice` + // ppc64:`.*runtime\.memclrNoHeapPointers` + // ppc64:-`.*runtime\.makeslice` + return append(s, make([]byte, l)...) +} + +func SliceExtensionVarInt64(s []byte, l int64) []byte { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:`.*runtime\.panicmakeslicelen` + return append(s, make([]byte, l)...) +} + +func SliceExtensionVarUint64(s []byte, l uint64) []byte { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:`.*runtime\.panicmakeslicelen` + return append(s, make([]byte, l)...) +} + +func SliceExtensionVarUint(s []byte, l uint) []byte { + // amd64:`.*runtime\.memclrNoHeapPointers` + // amd64:-`.*runtime\.makeslice` + // amd64:`.*runtime\.panicmakeslicelen` + return append(s, make([]byte, l)...) +} + +func SliceExtensionInt64(s []int, l64 int64) []int { + // 386:`.*runtime\.makeslice` + // 386:-`.*runtime\.memclr` + return append(s, make([]int, l64)...) +} + +// ------------------ // +// Make+Copy // +// ------------------ // + +// Issue #26252 - avoid memclr for make+copy + +func SliceMakeCopyLen(s []int) []int { + // amd64:`.*runtime\.mallocgc` + // amd64:`.*runtime\.memmove` + // amd64:-`.*runtime\.makeslice` + // ppc64le:`.*runtime\.mallocgc` + // ppc64le:`.*runtime\.memmove` + // ppc64le:-`.*runtime\.makeslice` + // ppc64:`.*runtime\.mallocgc` + // ppc64:`.*runtime\.memmove` + // ppc64:-`.*runtime\.makeslice` + a := make([]int, len(s)) + copy(a, s) + return a +} + +func SliceMakeCopyLenPtr(s []*int) []*int { + // amd64:`.*runtime\.makeslicecopy` + // amd64:-`.*runtime\.makeslice\(` + // amd64:-`.*runtime\.typedslicecopy + // ppc64le:`.*runtime\.makeslicecopy` + // ppc64le:-`.*runtime\.makeslice\(` + // ppc64le:-`.*runtime\.typedslicecopy + // ppc64:`.*runtime\.makeslicecopy` + // ppc64:-`.*runtime\.makeslice\(` + // ppc64:-`.*runtime\.typedslicecopy + a := make([]*int, len(s)) + copy(a, s) + return a +} + +func SliceMakeCopyConst(s []int) []int { + // amd64:`.*runtime\.makeslicecopy` + // amd64:-`.*runtime\.makeslice\(` + // amd64:-`.*runtime\.memmove` + a := make([]int, 4) + copy(a, s) + return a +} + +func SliceMakeCopyConstPtr(s []*int) []*int { + // amd64:`.*runtime\.makeslicecopy` + // amd64:-`.*runtime\.makeslice\(` + // amd64:-`.*runtime\.typedslicecopy + a := make([]*int, 4) + copy(a, s) + return a +} + +func SliceMakeCopyNoOptNoDeref(s []*int) []*int { + a := new([]*int) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + *a = make([]*int, 4) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(*a, s) + return *a +} + +func SliceMakeCopyNoOptNoVar(s []*int) []*int { + a := make([][]*int, 1) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a[0] = make([]*int, 4) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(a[0], s) + return a[0] +} + +func SliceMakeCopyNoOptBlank(s []*int) []*int { + var a []*int + // amd64:-`.*runtime\.makeslicecopy` + _ = make([]*int, 4) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(a, s) + return a +} + +func SliceMakeCopyNoOptNoMake(s []*int) []*int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:-`.*runtime\.objectnew` + a := *new([]*int) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(a, s) + return a +} + +func SliceMakeCopyNoOptNoHeapAlloc(s []*int) int { + // amd64:-`.*runtime\.makeslicecopy` + a := make([]*int, 4) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(a, s) + return cap(a) +} + +func SliceMakeCopyNoOptNoCap(s []*int) []*int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 0, 4) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.typedslicecopy` + copy(a, s) + return a +} + +func SliceMakeCopyNoOptNoCopy(s []*int) []*int { + copy := func(x, y []*int) {} + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 4) + // amd64:-`.*runtime\.makeslicecopy` + copy(a, s) + return a +} + +func SliceMakeCopyNoOptWrongOrder(s []*int) []*int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 4) + // amd64:`.*runtime\.typedslicecopy` + // amd64:-`.*runtime\.makeslicecopy` + copy(s, a) + return a +} + +func SliceMakeCopyNoOptWrongAssign(s []*int) []*int { + var a []*int + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + s = make([]*int, 4) + // amd64:`.*runtime\.typedslicecopy` + // amd64:-`.*runtime\.makeslicecopy` + copy(a, s) + return s +} + +func SliceMakeCopyNoOptCopyLength(s []*int) (int, []*int) { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 4) + // amd64:`.*runtime\.typedslicecopy` + // amd64:-`.*runtime\.makeslicecopy` + n := copy(a, s) + return n, a +} + +func SliceMakeCopyNoOptSelfCopy(s []*int) []*int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 4) + // amd64:`.*runtime\.typedslicecopy` + // amd64:-`.*runtime\.makeslicecopy` + copy(a, a) + return a +} + +func SliceMakeCopyNoOptTargetReference(s []*int) []*int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]*int, 4) + // amd64:`.*runtime\.typedslicecopy` + // amd64:-`.*runtime\.makeslicecopy` + copy(a, s[:len(a)]) + return a +} + +func SliceMakeCopyNoOptCap(s []int) []int { + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.makeslice\(` + a := make([]int, len(s), 9) + // amd64:-`.*runtime\.makeslicecopy` + // amd64:`.*runtime\.memmove` + copy(a, s) + return a +} + +func SliceMakeCopyNoMemmoveDifferentLen(s []int) []int { + // amd64:`.*runtime\.makeslicecopy` + // amd64:-`.*runtime\.memmove` + a := make([]int, len(s)-1) + // amd64:-`.*runtime\.memmove` + copy(a, s) + return a +} + +// ---------------------- // +// Nil check of &s[0] // +// ---------------------- // +// See issue 30366 +func SliceNilCheck(s []int) { + p := &s[0] + // amd64:-`TESTB` + _ = *p +} + +// ---------------------- // +// Init slice literal // +// ---------------------- // +// See issue 21561 +func InitSmallSliceLiteral() []int { + // amd64:`MOVQ\t[$]42` + return []int{42} +} + +func InitNotSmallSliceLiteral() []int { + // amd64:`LEAQ\t.*stmp_` + return []int{ + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + 42, + } +} + +// --------------------------------------- // +// Test PPC64 SUBFCconst folding rules // +// triggered by slice operations. // +// --------------------------------------- // + +func SliceWithConstCompare(a []int, b int) []int { + var c []int = []int{1, 2, 3, 4, 5} + if b+len(a) < len(c) { + // ppc64le:-"NEG" + // ppc64:-"NEG" + return c[b:] + } + return a +} + +func SliceWithSubtractBound(a []int, b int) []int { + // ppc64le:"SUBC",-"NEG" + // ppc64:"SUBC",-"NEG" + return a[(3 - b):] +} + +// --------------------------------------- // +// Code generation for unsafe.Slice // +// --------------------------------------- // + +func Slice1(p *byte, i int) []byte { + // amd64:-"MULQ" + return unsafe.Slice(p, i) +} +func Slice0(p *struct{}, i int) []struct{} { + // amd64:-"MULQ" + return unsafe.Slice(p, i) +} diff --git a/test/codegen/smallintiface.go b/test/codegen/smallintiface.go new file mode 100644 index 0000000..0207a0a --- /dev/null +++ b/test/codegen/smallintiface.go @@ -0,0 +1,22 @@ +// asmcheck + +package codegen + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +func booliface() interface{} { + // amd64:`LEAQ\truntime.staticuint64s\+8\(SB\)` + return true +} + +func smallint8iface() interface{} { + // amd64:`LEAQ\truntime.staticuint64s\+2024\(SB\)` + return int8(-3) +} + +func smalluint8iface() interface{} { + // amd64:`LEAQ\truntime.staticuint64s\+24\(SB\)` + return uint8(3) +} diff --git a/test/codegen/spectre.go b/test/codegen/spectre.go new file mode 100644 index 0000000..d845da3 --- /dev/null +++ b/test/codegen/spectre.go @@ -0,0 +1,38 @@ +// +build amd64 +// asmcheck -gcflags=-spectre=index + +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func IndexArray(x *[10]int, i int) int { + // amd64:`CMOVQCC` + return x[i] +} + +func IndexString(x string, i int) byte { + // amd64:`CMOVQLS` + return x[i] +} + +func IndexSlice(x []float64, i int) float64 { + // amd64:`CMOVQLS` + return x[i] +} + +func SliceArray(x *[10]int, i, j int) []int { + // amd64:`CMOVQHI` + return x[i:j] +} + +func SliceString(x string, i, j int) string { + // amd64:`CMOVQHI` + return x[i:j] +} + +func SliceSlice(x []float64, i, j int) []float64 { + // amd64:`CMOVQHI` + return x[i:j] +} diff --git a/test/codegen/stack.go b/test/codegen/stack.go new file mode 100644 index 0000000..f28b4a3 --- /dev/null +++ b/test/codegen/stack.go @@ -0,0 +1,120 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +import "runtime" + +// This file contains code generation tests related to the use of the +// stack. + +// Check that stack stores are optimized away. + +// 386:"TEXT\t.*, [$]0-" +// amd64:"TEXT\t.*, [$]0-" +// arm:"TEXT\t.*, [$]-4-" +// arm64:"TEXT\t.*, [$]0-" +// mips:"TEXT\t.*, [$]-4-" +// ppc64:"TEXT\t.*, [$]0-" +// ppc64le:"TEXT\t.*, [$]0-" +// s390x:"TEXT\t.*, [$]0-" +func StackStore() int { + var x int + return *(&x) +} + +type T struct { + A, B, C, D int // keep exported fields + x, y, z int // reset unexported fields +} + +// Check that large structs are cleared directly (issue #24416). + +// 386:"TEXT\t.*, [$]0-" +// amd64:"TEXT\t.*, [$]0-" +// arm:"TEXT\t.*, [$]0-" (spills return address) +// arm64:"TEXT\t.*, [$]0-" +// mips:"TEXT\t.*, [$]-4-" +// ppc64:"TEXT\t.*, [$]0-" +// ppc64le:"TEXT\t.*, [$]0-" +// s390x:"TEXT\t.*, [$]0-" +func ZeroLargeStruct(x *T) { + t := T{} + *x = t +} + +// Check that structs are partially initialised directly (issue #24386). + +// Notes: +// - 386 fails due to spilling a register +// amd64:"TEXT\t.*, [$]0-" +// arm:"TEXT\t.*, [$]0-" (spills return address) +// arm64:"TEXT\t.*, [$]0-" +// ppc64:"TEXT\t.*, [$]0-" +// ppc64le:"TEXT\t.*, [$]0-" +// s390x:"TEXT\t.*, [$]0-" +// Note: that 386 currently has to spill a register. +func KeepWanted(t *T) { + *t = T{A: t.A, B: t.B, C: t.C, D: t.D} +} + +// Check that small array operations avoid using the stack (issue #15925). + +// Notes: +// - 386 fails due to spilling a register +// - arm & mips fail due to softfloat calls +// amd64:"TEXT\t.*, [$]0-" +// arm64:"TEXT\t.*, [$]0-" +// ppc64:"TEXT\t.*, [$]0-" +// ppc64le:"TEXT\t.*, [$]0-" +// s390x:"TEXT\t.*, [$]0-" +func ArrayAdd64(a, b [4]float64) [4]float64 { + return [4]float64{a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]} +} + +// Check that small array initialization avoids using the stack. + +// 386:"TEXT\t.*, [$]0-" +// amd64:"TEXT\t.*, [$]0-" +// arm:"TEXT\t.*, [$]0-" (spills return address) +// arm64:"TEXT\t.*, [$]0-" +// mips:"TEXT\t.*, [$]-4-" +// ppc64:"TEXT\t.*, [$]0-" +// ppc64le:"TEXT\t.*, [$]0-" +// s390x:"TEXT\t.*, [$]0-" +func ArrayInit(i, j int) [4]int { + return [4]int{i, 0, j, 0} +} + +// Check that assembly output has matching offset and base register +// (issue #21064). + +func check_asmout(b [2]int) int { + runtime.GC() // use some frame + // amd64:`.*b\+24\(SP\)` + // arm:`.*b\+4\(FP\)` + return b[1] +} + +// Check that simple functions get promoted to nosplit, even when +// they might panic in various ways. See issue 31219. +// amd64:"TEXT\t.*NOSPLIT.*" +func MightPanic(a []int, i, j, k, s int) { + _ = a[i] // panicIndex + _ = a[i:j] // panicSlice + _ = a[i:j:k] // also panicSlice + _ = i << s // panicShift + _ = i / j // panicDivide +} + +// Put a defer in a loop, so second defer is not open-coded +func Defer() { + for i := 0; i < 2; i++ { + defer func() {}() + } + // amd64:`CALL\truntime\.deferprocStack` + defer func() {}() +} diff --git a/test/codegen/strings.go b/test/codegen/strings.go new file mode 100644 index 0000000..a2c2fc0 --- /dev/null +++ b/test/codegen/strings.go @@ -0,0 +1,65 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains code generation tests related to the handling of +// string types. + +func CountRunes(s string) int { // Issue #24923 + // amd64:`.*countrunes` + return len([]rune(s)) +} + +func ToByteSlice() []byte { // Issue #24698 + // amd64:`LEAQ\ttype:\[3\]uint8` + // amd64:`CALL\truntime\.newobject` + // amd64:-`.*runtime.stringtoslicebyte` + return []byte("foo") +} + +// Loading from read-only symbols should get transformed into constants. +func ConstantLoad() { + // 12592 = 0x3130 + // 50 = 0x32 + // amd64:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(` + // 386:`MOVW\t\$12592, \(`,`MOVB\t\$50, 2\(` + // arm:`MOVW\t\$48`,`MOVW\t\$49`,`MOVW\t\$50` + // arm64:`MOVD\t\$12592`,`MOVD\t\$50` + // wasm:`I64Const\t\$12592`,`I64Store16\t\$0`,`I64Const\t\$50`,`I64Store8\t\$2` + // mips64:`MOVV\t\$48`,`MOVV\t\$49`,`MOVV\t\$50` + bsink = []byte("012") + + // 858927408 = 0x33323130 + // 13620 = 0x3534 + // amd64:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(` + // 386:`MOVL\t\$858927408`,`MOVW\t\$13620, 4\(` + // arm64:`MOVD\t\$858927408`,`MOVD\t\$13620` + // wasm:`I64Const\t\$858927408`,`I64Store32\t\$0`,`I64Const\t\$13620`,`I64Store16\t\$4` + bsink = []byte("012345") + + // 3978425819141910832 = 0x3736353433323130 + // 7306073769690871863 = 0x6564636261393837 + // amd64:`MOVQ\t\$3978425819141910832`,`MOVQ\t\$7306073769690871863` + // 386:`MOVL\t\$858927408, \(`,`DUFFCOPY` + // arm64:`MOVD\t\$3978425819141910832`,`MOVD\t\$7306073769690871863`,`MOVD\t\$15` + // wasm:`I64Const\t\$3978425819141910832`,`I64Store\t\$0`,`I64Const\t\$7306073769690871863`,`I64Store\t\$7` + bsink = []byte("0123456789abcde") + + // 56 = 0x38 + // amd64:`MOVQ\t\$3978425819141910832`,`MOVB\t\$56` + bsink = []byte("012345678") + + // 14648 = 0x3938 + // amd64:`MOVQ\t\$3978425819141910832`,`MOVW\t\$14648` + bsink = []byte("0123456789") + + // 1650538808 = 0x62613938 + // amd64:`MOVQ\t\$3978425819141910832`,`MOVL\t\$1650538808` + bsink = []byte("0123456789ab") +} + +var bsink []byte diff --git a/test/codegen/structs.go b/test/codegen/structs.go new file mode 100644 index 0000000..c4bcb55 --- /dev/null +++ b/test/codegen/structs.go @@ -0,0 +1,46 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +// This file contains code generation tests related to the handling of +// struct types. + +// ------------- // +// Zeroing // +// ------------- // + +type Z1 struct { + a, b, c int +} + +func Zero1(t *Z1) { // Issue #18370 + // amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)` + *t = Z1{} +} + +type Z2 struct { + a, b, c *int +} + +func Zero2(t *Z2) { + // amd64:`MOVUPS\tX[0-9]+, \(.*\)`,`MOVQ\t\$0, 16\(.*\)` + // amd64:`.*runtime[.]gcWriteBarrier.*\(SB\)` + *t = Z2{} +} + +// ------------------ // +// Initializing // +// ------------------ // + +type I1 struct { + a, b, c, d int +} + +func Init1(p *I1) { // Issue #18872 + // amd64:`MOVQ\t[$]1`,`MOVQ\t[$]2`,`MOVQ\t[$]3`,`MOVQ\t[$]4` + *p = I1{1, 2, 3, 4} +} diff --git a/test/codegen/switch.go b/test/codegen/switch.go new file mode 100644 index 0000000..603e0be --- /dev/null +++ b/test/codegen/switch.go @@ -0,0 +1,101 @@ +// asmcheck + +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// These tests check code generation of switch statements. + +package codegen + +// see issue 33934 +func f(x string) int { + // amd64:-`cmpstring` + switch x { + case "": + return -1 + case "1", "2", "3": + return -2 + default: + return -3 + } +} + +// use jump tables for 8+ int cases +func square(x int) int { + // amd64:`JMP\s\(.*\)\(.*\)$` + // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$` + switch x { + case 1: + return 1 + case 2: + return 4 + case 3: + return 9 + case 4: + return 16 + case 5: + return 25 + case 6: + return 36 + case 7: + return 49 + case 8: + return 64 + default: + return x * x + } +} + +// use jump tables for 8+ string lengths +func length(x string) int { + // amd64:`JMP\s\(.*\)\(.*\)$` + // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$` + switch x { + case "a": + return 1 + case "bb": + return 2 + case "ccc": + return 3 + case "dddd": + return 4 + case "eeeee": + return 5 + case "ffffff": + return 6 + case "ggggggg": + return 7 + case "hhhhhhhh": + return 8 + default: + return len(x) + } +} + +// Use single-byte ordered comparisons for binary searching strings. +// See issue 53333. +func mimetype(ext string) string { + // amd64: `CMPB\s1\(.*\), \$104$`,-`cmpstring` + // arm64: `MOVB\s1\(R.*\), R.*$`, `CMPW\s\$104, R.*$`, -`cmpstring` + switch ext { + // amd64: `CMPL\s\(.*\), \$1836345390$` + // arm64: `MOVD\s\$1836345390`, `CMPW\sR.*, R.*$` + case ".htm": + return "A" + // amd64: `CMPL\s\(.*\), \$1953457454$` + // arm64: `MOVD\s\$1953457454`, `CMPW\sR.*, R.*$` + case ".eot": + return "B" + // amd64: `CMPL\s\(.*\), \$1735815982$` + // arm64: `MOVD\s\$1735815982`, `CMPW\sR.*, R.*$` + case ".svg": + return "C" + // amd64: `CMPL\s\(.*\), \$1718907950$` + // arm64: `MOVD\s\$1718907950`, `CMPW\sR.*, R.*$` + case ".ttf": + return "D" + default: + return "" + } +} diff --git a/test/codegen/zerosize.go b/test/codegen/zerosize.go new file mode 100644 index 0000000..ecf3305 --- /dev/null +++ b/test/codegen/zerosize.go @@ -0,0 +1,25 @@ +// asmcheck + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Make sure a pointer variable and a zero-sized variable +// aren't allocated to the same stack slot. +// See issue 24993. + +package codegen + +func zeroSize() { + c := make(chan struct{}) + // amd64:`MOVQ\t\$0, command-line-arguments\.s\+56\(SP\)` + var s *int + // force s to be a stack object, also use some (fixed) stack space + g(&s, 1, 2, 3, 4, 5) + + // amd64:`LEAQ\tcommand-line-arguments\..*\+55\(SP\)` + c <- struct{}{} +} + +//go:noinline +func g(**int, int, int, int, int, int) {} |