diff options
Diffstat (limited to 'src/cmd/internal/obj/riscv/obj.go')
-rw-r--r-- | src/cmd/internal/obj/riscv/obj.go | 2034 |
1 files changed, 2034 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go new file mode 100644 index 0000000..9257a64 --- /dev/null +++ b/src/cmd/internal/obj/riscv/obj.go @@ -0,0 +1,2034 @@ +// Copyright © 2015 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package riscv + +import ( + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "fmt" +) + +func buildop(ctxt *obj.Link) {} + +// jalrToSym replaces p with a set of Progs needed to jump to the Sym in p. +// lr is the link register to use for the JALR. +// p must be a CALL, JMP or RET. +func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog { + if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY { + ctxt.Diag("unexpected Prog in jalrToSym: %v", p) + return p + } + + // TODO(jsing): Consider using a single JAL instruction and teaching + // the linker to provide trampolines for the case where the destination + // offset is too large. This would potentially reduce instructions for + // the common case, but would require three instructions to go via the + // trampoline. + + to := p.To + + p.As = AAUIPC + p.Mark |= NEED_PCREL_ITYPE_RELOC + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p = obj.Appendp(p, newprog) + + // Leave Sym only for the CALL reloc in assemble. + p.As = AJALR + p.From.Type = obj.TYPE_REG + p.From.Reg = lr + p.Reg = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_TMP + p.To.Sym = to.Sym + + return p +} + +// progedit is called individually for each *obj.Prog. It normalizes instruction +// formats and eliminates as many pseudo-instructions as possible. +func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + + // Expand binary instructions to ternary ones. + if p.Reg == 0 { + switch p.As { + case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI, + AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA, + AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW, + AREM, AREMU, AREMW, AREMUW: + p.Reg = p.To.Reg + } + } + + // Rewrite instructions with constant operands to refer to the immediate + // form of the instruction. + if p.From.Type == obj.TYPE_CONST { + switch p.As { + case AADD: + p.As = AADDI + case ASLT: + p.As = ASLTI + case ASLTU: + p.As = ASLTIU + case AAND: + p.As = AANDI + case AOR: + p.As = AORI + case AXOR: + p.As = AXORI + case ASLL: + p.As = ASLLI + case ASRL: + p.As = ASRLI + case ASRA: + p.As = ASRAI + } + } + + switch p.As { + case obj.AJMP: + // Turn JMP into JAL ZERO or JALR ZERO. + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_ZERO + + switch p.To.Type { + case obj.TYPE_BRANCH: + p.As = AJAL + case obj.TYPE_MEM: + switch p.To.Name { + case obj.NAME_NONE: + p.As = AJALR + case obj.NAME_EXTERN: + // Handled in preprocess. + default: + ctxt.Diag("unsupported name %d for %v", p.To.Name, p) + } + default: + panic(fmt.Sprintf("unhandled type %+v", p.To.Type)) + } + + case obj.ACALL: + switch p.To.Type { + case obj.TYPE_MEM: + // Handled in preprocess. + case obj.TYPE_REG: + p.As = AJALR + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + default: + ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p) + } + + case obj.AUNDEF: + p.As = AEBREAK + + case ASCALL: + // SCALL is the old name for ECALL. + p.As = AECALL + + case ASBREAK: + // SBREAK is the old name for EBREAK. + p.As = AEBREAK + } +} + +// addrToReg extracts the register from an Addr, handling special Addr.Names. +func addrToReg(a obj.Addr) int16 { + switch a.Name { + case obj.NAME_PARAM, obj.NAME_AUTO: + return REG_SP + } + return a.Reg +} + +// movToLoad converts a MOV mnemonic into the corresponding load instruction. +func movToLoad(mnemonic obj.As) obj.As { + switch mnemonic { + case AMOV: + return ALD + case AMOVB: + return ALB + case AMOVH: + return ALH + case AMOVW: + return ALW + case AMOVBU: + return ALBU + case AMOVHU: + return ALHU + case AMOVWU: + return ALWU + case AMOVF: + return AFLW + case AMOVD: + return AFLD + default: + panic(fmt.Sprintf("%+v is not a MOV", mnemonic)) + } +} + +// movToStore converts a MOV mnemonic into the corresponding store instruction. +func movToStore(mnemonic obj.As) obj.As { + switch mnemonic { + case AMOV: + return ASD + case AMOVB: + return ASB + case AMOVH: + return ASH + case AMOVW: + return ASW + case AMOVF: + return AFSW + case AMOVD: + return AFSD + default: + panic(fmt.Sprintf("%+v is not a MOV", mnemonic)) + } +} + +// rewriteMOV rewrites MOV pseudo-instructions. +func rewriteMOV(ctxt *obj.Link, newprog obj.ProgAlloc, p *obj.Prog) { + switch p.As { + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + default: + panic(fmt.Sprintf("%+v is not a MOV pseudo-instruction", p.As)) + } + + switch p.From.Type { + case obj.TYPE_MEM: // MOV c(Rs), Rd -> L $c, Rs, Rd + switch p.From.Name { + case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE: + if p.To.Type != obj.TYPE_REG { + ctxt.Diag("unsupported load at %v", p) + } + p.As = movToLoad(p.As) + p.From.Reg = addrToReg(p.From) + + case obj.NAME_EXTERN, obj.NAME_STATIC: + // AUIPC $off_hi, R + // L $off_lo, R + as := p.As + to := p.To + + p.As = AAUIPC + p.Mark |= NEED_PCREL_ITYPE_RELOC + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: to.Reg} + p = obj.Appendp(p, newprog) + + p.As = movToLoad(as) + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: to.Reg, Offset: 0} + p.To = to + + default: + ctxt.Diag("unsupported name %d for %v", p.From.Name, p) + } + + case obj.TYPE_REG: + switch p.To.Type { + case obj.TYPE_REG: + switch p.As { + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + default: + ctxt.Diag("unsupported register-register move at %v", p) + } + + case obj.TYPE_MEM: // MOV Rs, c(Rd) -> S $c, Rs, Rd + switch p.As { + case AMOVBU, AMOVHU, AMOVWU: + ctxt.Diag("unsupported unsigned store at %v", p) + } + switch p.To.Name { + case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE: + p.As = movToStore(p.As) + p.To.Reg = addrToReg(p.To) + + case obj.NAME_EXTERN: + // AUIPC $off_hi, TMP + // S $off_lo, TMP, R + as := p.As + from := p.From + + p.As = AAUIPC + p.Mark |= NEED_PCREL_STYPE_RELOC + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym}) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p = obj.Appendp(p, newprog) + + p.As = movToStore(as) + p.From = from + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: 0} + + default: + ctxt.Diag("unsupported name %d for %v", p.From.Name, p) + } + + default: + ctxt.Diag("unsupported MOV at %v", p) + } + + case obj.TYPE_CONST: + // MOV $c, R + // If c is small enough, convert to: + // ADD $c, ZERO, R + // If not, convert to: + // LUI top20bits(c), R + // ADD bottom12bits(c), R, R + if p.As != AMOV { + ctxt.Diag("unsupported constant load at %v", p) + } + off := p.From.Offset + to := p.To + + low, high, err := Split32BitImmediate(off) + if err != nil { + ctxt.Diag("%v: constant %d too large: %v", p, off, err) + } + + // LUI is only necessary if the offset doesn't fit in 12-bits. + needLUI := high != 0 + if needLUI { + p.As = ALUI + p.To = to + // Pass top 20 bits to LUI. + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high} + p = obj.Appendp(p, newprog) + } + p.As = AADDIW + p.To = to + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: low} + p.Reg = REG_ZERO + if needLUI { + p.Reg = to.Reg + } + + case obj.TYPE_ADDR: // MOV $sym+off(SP/SB), R + if p.To.Type != obj.TYPE_REG || p.As != AMOV { + ctxt.Diag("unsupported addr MOV at %v", p) + } + switch p.From.Name { + case obj.NAME_EXTERN, obj.NAME_STATIC: + // AUIPC $off_hi, R + // ADDI $off_lo, R + to := p.To + + p.As = AAUIPC + p.Mark |= NEED_PCREL_ITYPE_RELOC + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: p.From.Offset, Sym: p.From.Sym}) + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} + p.Reg = 0 + p.To = to + p = obj.Appendp(p, newprog) + + p.As = AADDI + p.From = obj.Addr{Type: obj.TYPE_CONST} + p.Reg = to.Reg + p.To = to + + case obj.NAME_PARAM, obj.NAME_AUTO: + p.As = AADDI + p.Reg = REG_SP + p.From.Type = obj.TYPE_CONST + + case obj.NAME_NONE: + p.As = AADDI + p.Reg = p.From.Reg + p.From.Type = obj.TYPE_CONST + p.From.Reg = 0 + + default: + ctxt.Diag("bad addr MOV from name %v at %v", p.From.Name, p) + } + + default: + ctxt.Diag("unsupported MOV at %v", p) + } +} + +// InvertBranch inverts the condition of a conditional branch. +func InvertBranch(as obj.As) obj.As { + switch as { + case ABEQ: + return ABNE + case ABEQZ: + return ABNEZ + case ABGE: + return ABLT + case ABGEU: + return ABLTU + case ABGEZ: + return ABLTZ + case ABGT: + return ABLE + case ABGTU: + return ABLEU + case ABGTZ: + return ABLEZ + case ABLE: + return ABGT + case ABLEU: + return ABGTU + case ABLEZ: + return ABGTZ + case ABLT: + return ABGE + case ABLTU: + return ABGEU + case ABLTZ: + return ABGEZ + case ABNE: + return ABEQ + case ABNEZ: + return ABEQZ + default: + panic("InvertBranch: not a branch") + } +} + +// containsCall reports whether the symbol contains a CALL (or equivalent) +// instruction. Must be called after progedit. +func containsCall(sym *obj.LSym) bool { + // CALLs are CALL or JAL(R) with link register LR. + for p := sym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: + return true + case AJAL, AJALR: + if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR { + return true + } + } + } + + return false +} + +// setPCs sets the Pc field in all instructions reachable from p. +// It uses pc as the initial value. +func setPCs(p *obj.Prog, pc int64) { + for ; p != nil; p = p.Link { + p.Pc = pc + for _, ins := range instructionsForProg(p) { + pc += int64(ins.length()) + } + } +} + +// stackOffset updates Addr offsets based on the current stack size. +// +// The stack looks like: +// ------------------- +// | | +// | PARAMs | +// | | +// | | +// ------------------- +// | Parent RA | SP on function entry +// ------------------- +// | | +// | | +// | AUTOs | +// | | +// | | +// ------------------- +// | RA | SP during function execution +// ------------------- +// +// FixedFrameSize makes other packages aware of the space allocated for RA. +// +// A nicer version of this diagram can be found on slide 21 of the presentation +// attached to: +// +// https://golang.org/issue/16922#issuecomment-243748180 +// +func stackOffset(a *obj.Addr, stacksize int64) { + switch a.Name { + case obj.NAME_AUTO: + // Adjust to the top of AUTOs. + a.Offset += stacksize + case obj.NAME_PARAM: + // Adjust to the bottom of PARAMs. + a.Offset += stacksize + 8 + } +} + +// preprocess generates prologue and epilogue code, computes PC-relative branch +// and jump offsets, and resolves pseudo-registers. +// +// preprocess is called once per linker symbol. +// +// When preprocess finishes, all instructions in the symbol are either +// concrete, real RISC-V instructions or directive pseudo-ops like TEXT, +// PCDATA, and FUNCDATA. +func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { + return + } + + // Generate the prologue. + text := cursym.Func().Text + if text.As != obj.ATEXT { + ctxt.Diag("preprocess: found symbol that does not start with TEXT directive") + return + } + + stacksize := text.To.Offset + if stacksize == -8 { + // Historical way to mark NOFRAME. + text.From.Sym.Set(obj.AttrNoFrame, true) + stacksize = 0 + } + if stacksize < 0 { + ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize) + } + if text.From.Sym.NoFrame() { + if stacksize != 0 { + ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize) + } + } + + if !containsCall(cursym) { + text.From.Sym.Set(obj.AttrLeaf, true) + if stacksize == 0 { + // A leaf function with no locals has no frame. + text.From.Sym.Set(obj.AttrNoFrame, true) + } + } + + // Save LR unless there is no frame. + if !text.From.Sym.NoFrame() { + stacksize += ctxt.FixedFrameSize() + } + + cursym.Func().Args = text.To.Val.(int32) + cursym.Func().Locals = int32(stacksize) + + prologue := text + + if !cursym.Func().Text.From.Sym.NoSplit() { + prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check + } + + if stacksize != 0 { + prologue = ctxt.StartUnsafePoint(prologue, newprog) + + // Actually save LR. + prologue = obj.Appendp(prologue, newprog) + prologue.As = AMOV + prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize} + + // Insert stack adjustment. + prologue = obj.Appendp(prologue, newprog) + prologue.As = AADDI + prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize} + prologue.Reg = REG_SP + prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + prologue.Spadj = int32(stacksize) + + prologue = ctxt.EndUnsafePoint(prologue, newprog, -1) + } + + if cursym.Func().Text.From.Sym.Wrapper() { + // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame + // + // MOV g_panic(g), X11 + // BNE X11, ZERO, adjust + // end: + // NOP + // ...rest of function.. + // adjust: + // MOV panic_argp(X11), X12 + // ADD $(autosize+FIXED_FRAME), SP, X13 + // BNE X12, X13, end + // ADD $FIXED_FRAME, SP, X12 + // MOV X12, panic_argp(X11) + // JMP end + // + // The NOP is needed to give the jumps somewhere to land. + + ldpanic := obj.Appendp(prologue, newprog) + + ldpanic.As = AMOV + ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic + ldpanic.Reg = 0 + ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11} + + bneadj := obj.Appendp(ldpanic, newprog) + bneadj.As = ABNE + bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11} + bneadj.Reg = REG_ZERO + bneadj.To.Type = obj.TYPE_BRANCH + + endadj := obj.Appendp(bneadj, newprog) + endadj.As = obj.ANOP + + last := endadj + for last.Link != nil { + last = last.Link + } + + getargp := obj.Appendp(last, newprog) + getargp.As = AMOV + getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp + getargp.Reg = 0 + getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12} + + bneadj.To.SetTarget(getargp) + + calcargp := obj.Appendp(getargp, newprog) + calcargp.As = AADDI + calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.FixedFrameSize()} + calcargp.Reg = REG_SP + calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X13} + + testargp := obj.Appendp(calcargp, newprog) + testargp.As = ABNE + testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12} + testargp.Reg = REG_X13 + testargp.To.Type = obj.TYPE_BRANCH + testargp.To.SetTarget(endadj) + + adjargp := obj.Appendp(testargp, newprog) + adjargp.As = AADDI + adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)} + adjargp.Reg = REG_SP + adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12} + + setargp := obj.Appendp(adjargp, newprog) + setargp.As = AMOV + setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12} + setargp.Reg = 0 + setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp + + godone := obj.Appendp(setargp, newprog) + godone.As = AJAL + godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + godone.To.Type = obj.TYPE_BRANCH + godone.To.SetTarget(endadj) + } + + // Update stack-based offsets. + for p := cursym.Func().Text; p != nil; p = p.Link { + stackOffset(&p.From, stacksize) + stackOffset(&p.To, stacksize) + } + + // Additional instruction rewriting. + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case obj.AGETCALLERPC: + if cursym.Leaf() { + // MOV LR, Rd + p.As = AMOV + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_LR + } else { + // MOV (RSP), Rd + p.As = AMOV + p.From.Type = obj.TYPE_MEM + p.From.Reg = REG_SP + } + + case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: + switch p.To.Type { + case obj.TYPE_MEM: + jalrToSym(ctxt, p, newprog, REG_LR) + } + + case obj.AJMP: + switch p.To.Type { + case obj.TYPE_MEM: + switch p.To.Name { + case obj.NAME_EXTERN: + // JMP to symbol. + jalrToSym(ctxt, p, newprog, REG_ZERO) + } + } + + case obj.ARET: + // Replace RET with epilogue. + retJMP := p.To.Sym + + if stacksize != 0 { + // Restore LR. + p.As = AMOV + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0} + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + p = obj.Appendp(p, newprog) + + p.As = AADDI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize} + p.Reg = REG_SP + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP} + p.Spadj = int32(-stacksize) + p = obj.Appendp(p, newprog) + } + + if retJMP != nil { + p.As = obj.ARET + p.To.Sym = retJMP + p = jalrToSym(ctxt, p, newprog, REG_ZERO) + } else { + p.As = AJALR + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} + } + + // "Add back" the stack removed in the previous instruction. + // + // This is to avoid confusing pctospadj, which sums + // Spadj from function entry to each PC, and shouldn't + // count adjustments from earlier epilogues, since they + // won't affect later PCs. + p.Spadj = int32(stacksize) + + case AADDI: + // Refine Spadjs account for adjustment via ADDI instruction. + if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST { + p.Spadj = int32(-p.From.Offset) + } + } + } + + // Rewrite MOV pseudo-instructions. This cannot be done in + // progedit, as SP offsets need to be applied before we split + // up some of the Addrs. + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + rewriteMOV(ctxt, newprog, p) + } + } + + // Split immediates larger than 12-bits. + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + // <opi> $imm, REG, TO + case AADDI, AANDI, AORI, AXORI: + // LUI $high, TMP + // ADDI $low, TMP, TMP + // <op> TMP, REG, TO + q := *p + low, high, err := Split32BitImmediate(p.From.Offset) + if err != nil { + ctxt.Diag("%v: constant %d too large", p, p.From.Offset, err) + } + if high == 0 { + break // no need to split + } + + p.As = ALUI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p.Spadj = 0 // needed if TO is SP + p = obj.Appendp(p, newprog) + + p.As = AADDIW + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: low} + p.Reg = REG_TMP + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p = obj.Appendp(p, newprog) + + switch q.As { + case AADDI: + p.As = AADD + case AANDI: + p.As = AAND + case AORI: + p.As = AOR + case AXORI: + p.As = AXOR + default: + ctxt.Diag("unsupported instruction %v for splitting", q) + } + p.Spadj = q.Spadj + p.To = q.To + p.Reg = q.Reg + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + // <load> $imm, REG, TO (load $imm+(REG), TO) + case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD: + low, high, err := Split32BitImmediate(p.From.Offset) + if err != nil { + ctxt.Diag("%v: constant %d too large", p, p.From.Offset) + } + if high == 0 { + break // no need to split + } + q := *p + + // LUI $high, TMP + // ADD TMP, REG, TMP + // <load> $low, TMP, TO + p.As = ALUI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p.Spadj = 0 // needed if TO is SP + p = obj.Appendp(p, newprog) + + p.As = AADD + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p.Reg = q.From.Reg + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p = obj.Appendp(p, newprog) + + p.As = q.As + p.To = q.To + p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: low} + p.Reg = obj.REG_NONE + + // <store> $imm, REG, TO (store $imm+(TO), REG) + case ASD, ASB, ASH, ASW, AFSW, AFSD: + low, high, err := Split32BitImmediate(p.To.Offset) + if err != nil { + ctxt.Diag("%v: constant %d too large", p, p.To.Offset) + } + if high == 0 { + break // no need to split + } + q := *p + + // LUI $high, TMP + // ADD TMP, TO, TMP + // <store> $low, REG, TMP + p.As = ALUI + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high} + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p.Spadj = 0 // needed if TO is SP + p = obj.Appendp(p, newprog) + + p.As = AADD + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p.Reg = q.To.Reg + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + p = obj.Appendp(p, newprog) + + p.As = q.As + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: q.From.Reg, Offset: 0} + p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_TMP, Offset: low} + } + } + + // Compute instruction addresses. Once we do that, we need to check for + // overextended jumps and branches. Within each iteration, Pc differences + // are always lower bounds (since the program gets monotonically longer, + // a fixed point will be reached). No attempt to handle functions > 2GiB. + for { + rescan := false + setPCs(cursym.Func().Text, 0) + + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + if p.To.Type != obj.TYPE_BRANCH { + panic("assemble: instruction with branch-like opcode lacks destination") + } + offset := p.To.Target().Pc - p.Pc + if offset < -4096 || 4096 <= offset { + // Branch is long. Replace it with a jump. + jmp := obj.Appendp(p, newprog) + jmp.As = AJAL + jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + jmp.To = obj.Addr{Type: obj.TYPE_BRANCH} + jmp.To.SetTarget(p.To.Target()) + + p.As = InvertBranch(p.As) + p.To.SetTarget(jmp.Link) + + // We may have made previous branches too long, + // so recheck them. + rescan = true + } + case AJAL: + if p.To.Target() == nil { + panic("intersymbol jumps should be expressed as AUIPC+JALR") + } + offset := p.To.Target().Pc - p.Pc + if offset < -(1<<20) || (1<<20) <= offset { + // Replace with 2-instruction sequence. This assumes + // that TMP is not live across J instructions, since + // it is reserved by SSA. + jmp := obj.Appendp(p, newprog) + jmp.As = AJALR + jmp.From = p.From + jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + // p.From is not generally valid, however will be + // fixed up in the next loop. + p.As = AAUIPC + p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym} + p.From.SetTarget(p.To.Target()) + p.Reg = 0 + p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} + + rescan = true + } + } + } + + if !rescan { + break + } + } + + // Now that there are no long branches, resolve branch and jump targets. + // At this point, instruction rewriting which changes the number of + // instructions will break everything--don't do it! + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: + switch p.To.Type { + case obj.TYPE_BRANCH: + p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc + case obj.TYPE_MEM: + panic("unhandled type") + } + + case AAUIPC: + if p.From.Type == obj.TYPE_BRANCH { + low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc) + if err != nil { + ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc) + } + p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym} + p.Link.From.Offset = low + } + } + } + + // Validate all instructions - this provides nice error messages. + for p := cursym.Func().Text; p != nil; p = p.Link { + for _, ins := range instructionsForProg(p) { + ins.validate(ctxt) + } + } +} + +func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog { + // Leaf function with no frame is effectively NOSPLIT. + if framesize == 0 { + return p + } + + // MOV g_stackguard(g), X10 + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From.Type = obj.TYPE_MEM + p.From.Reg = REGG + p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 + if cursym.CFunc() { + p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 + } + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X10 + + var to_done, to_more *obj.Prog + + if framesize <= objabi.StackSmall { + // small stack: SP < stackguard + // BLTU SP, stackguard, done + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X10 + p.Reg = REG_SP + p.To.Type = obj.TYPE_BRANCH + to_done = p + } else if framesize <= objabi.StackBig { + // large stack: SP-framesize < stackguard-StackSmall + // ADD $-(framesize-StackSmall), SP, X11 + // BLTU X11, stackguard, done + p = obj.Appendp(p, newprog) + // TODO(sorear): logic inconsistent with comment, but both match all non-x86 arches + p.As = AADDI + p.From.Type = obj.TYPE_CONST + p.From.Offset = -(int64(framesize) - objabi.StackSmall) + p.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X11 + + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X10 + p.Reg = REG_X11 + p.To.Type = obj.TYPE_BRANCH + to_done = p + } else { + // Such a large stack we need to protect against wraparound. + // If SP is close to zero: + // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) + // The +StackGuard on both sides is required to keep the left side positive: + // SP is allowed to be slightly below stackguard. See stack.h. + // + // Preemption sets stackguard to StackPreempt, a very large value. + // That breaks the math above, so we have to check for that explicitly. + // // stackguard is X10 + // MOV $StackPreempt, X11 + // BEQ X10, X11, more + // ADD $StackGuard, SP, X11 + // SUB X10, X11 + // MOV $(framesize+(StackGuard-StackSmall)), X10 + // BGTU X11, X10, done + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From.Type = obj.TYPE_CONST + p.From.Offset = objabi.StackPreempt + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X11 + + p = obj.Appendp(p, newprog) + to_more = p + p.As = ABEQ + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X10 + p.Reg = REG_X11 + p.To.Type = obj.TYPE_BRANCH + + p = obj.Appendp(p, newprog) + p.As = AADDI + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(objabi.StackGuard) + p.Reg = REG_SP + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X11 + + p = obj.Appendp(p, newprog) + p.As = ASUB + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X10 + p.Reg = REG_X11 + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X11 + + p = obj.Appendp(p, newprog) + p.As = AMOV + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall + p.To.Type = obj.TYPE_REG + p.To.Reg = REG_X10 + + p = obj.Appendp(p, newprog) + p.As = ABLTU + p.From.Type = obj.TYPE_REG + p.From.Reg = REG_X10 + p.Reg = REG_X11 + p.To.Type = obj.TYPE_BRANCH + to_done = p + } + + p = ctxt.EmitEntryLiveness(cursym, p, newprog) + + // CALL runtime.morestack(SB) + p = obj.Appendp(p, newprog) + p.As = obj.ACALL + p.To.Type = obj.TYPE_BRANCH + if cursym.CFunc() { + p.To.Sym = ctxt.Lookup("runtime.morestackc") + } else if !cursym.Func().Text.From.Sym.NeedCtxt() { + p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt") + } else { + p.To.Sym = ctxt.Lookup("runtime.morestack") + } + if to_more != nil { + to_more.To.SetTarget(p) + } + p = jalrToSym(ctxt, p, newprog, REG_X5) + + // JMP start + p = obj.Appendp(p, newprog) + p.As = AJAL + p.To = obj.Addr{Type: obj.TYPE_BRANCH} + p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} + p.To.SetTarget(cursym.Func().Text.Link) + + // placeholder for to_done's jump target + p = obj.Appendp(p, newprog) + p.As = obj.ANOP // zero-width place holder + to_done.To.SetTarget(p) + + return p +} + +// signExtend sign extends val starting at bit bit. +func signExtend(val int64, bit uint) int64 { + return val << (64 - bit) >> (64 - bit) +} + +// Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit +// upper immediate and a signed 12-bit lower immediate to be added to the upper +// result. For example, high may be used in LUI and low in a following ADDI to +// generate a full 32-bit constant. +func Split32BitImmediate(imm int64) (low, high int64, err error) { + if !immIFits(imm, 32) { + return 0, 0, fmt.Errorf("immediate does not fit in 32-bits: %d", imm) + } + + // Nothing special needs to be done if the immediate fits in 12-bits. + if immIFits(imm, 12) { + return imm, 0, nil + } + + high = imm >> 12 + + // The bottom 12 bits will be treated as signed. + // + // If that will result in a negative 12 bit number, add 1 to + // our upper bits to adjust for the borrow. + // + // It is not possible for this increment to overflow. To + // overflow, the 20 top bits would be 1, and the sign bit for + // the low 12 bits would be set, in which case the entire 32 + // bit pattern fits in a 12 bit signed value. + if imm&(1<<11) != 0 { + high++ + } + + low = signExtend(imm, 12) + high = signExtend(high, 20) + + return low, high, nil +} + +func regVal(r, min, max uint32) uint32 { + if r < min || r > max { + panic(fmt.Sprintf("register out of range, want %d < %d < %d", min, r, max)) + } + return r - min +} + +// regI returns an integer register. +func regI(r uint32) uint32 { + return regVal(r, REG_X0, REG_X31) +} + +// regF returns a float register. +func regF(r uint32) uint32 { + return regVal(r, REG_F0, REG_F31) +} + +// regAddr extracts a register from an Addr. +func regAddr(a obj.Addr, min, max uint32) uint32 { + if a.Type != obj.TYPE_REG { + panic(fmt.Sprintf("ill typed: %+v", a)) + } + return regVal(uint32(a.Reg), min, max) +} + +// regIAddr extracts the integer register from an Addr. +func regIAddr(a obj.Addr) uint32 { + return regAddr(a, REG_X0, REG_X31) +} + +// regFAddr extracts the float register from an Addr. +func regFAddr(a obj.Addr) uint32 { + return regAddr(a, REG_F0, REG_F31) +} + +// immIFits reports whether immediate value x fits in nbits bits +// as a signed integer. +func immIFits(x int64, nbits uint) bool { + nbits-- + var min int64 = -1 << nbits + var max int64 = 1<<nbits - 1 + return min <= x && x <= max +} + +// immI extracts the signed integer of the specified size from an immediate. +func immI(as obj.As, imm int64, nbits uint) uint32 { + if !immIFits(imm, nbits) { + panic(fmt.Sprintf("%v\tsigned immediate %d cannot fit in %d bits", as, imm, nbits)) + } + return uint32(imm) +} + +func wantImmI(ctxt *obj.Link, as obj.As, imm int64, nbits uint) { + if !immIFits(imm, nbits) { + ctxt.Diag("%v\tsigned immediate cannot be larger than %d bits but got %d", as, nbits, imm) + } +} + +func wantReg(ctxt *obj.Link, as obj.As, pos string, descr string, r, min, max uint32) { + if r < min || r > max { + var suffix string + if r != obj.REG_NONE { + suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r))) + } + ctxt.Diag("%v\texpected %s register in %s position%s", as, descr, pos, suffix) + } +} + +func wantNoneReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + if r != obj.REG_NONE { + ctxt.Diag("%v\texpected no register in %s but got register %s", as, pos, RegName(int(r))) + } +} + +// wantIntReg checks that r is an integer register. +func wantIntReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + wantReg(ctxt, as, pos, "integer", r, REG_X0, REG_X31) +} + +// wantFloatReg checks that r is a floating-point register. +func wantFloatReg(ctxt *obj.Link, as obj.As, pos string, r uint32) { + wantReg(ctxt, as, pos, "float", r, REG_F0, REG_F31) +} + +// wantEvenOffset checks that the offset is a multiple of two. +func wantEvenOffset(ctxt *obj.Link, as obj.As, offset int64) { + if offset%1 != 0 { + ctxt.Diag("%v\tjump offset %v must be even", as, offset) + } +} + +func validateRIII(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRFFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRFFI(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRFI(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRIF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantFloatReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateII(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) +} + +func validateIF(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantFloatReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) +} + +func validateSI(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) +} + +func validateSF(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 12) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantFloatReg(ctxt, ins.as, "rs1", ins.rs1) +} + +func validateB(ctxt *obj.Link, ins *instruction) { + // Offsets are multiples of two, so accept 13 bit immediates for the + // 12 bit slot. We implicitly drop the least significant bit in encodeB. + wantEvenOffset(ctxt, ins.as, ins.imm) + wantImmI(ctxt, ins.as, ins.imm, 13) + wantNoneReg(ctxt, ins.as, "rd", ins.rd) + wantIntReg(ctxt, ins.as, "rs1", ins.rs1) + wantIntReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateU(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins.as, ins.imm, 20) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateJ(ctxt *obj.Link, ins *instruction) { + // Offsets are multiples of two, so accept 21 bit immediates for the + // 20 bit slot. We implicitly drop the least significant bit in encodeJ. + wantEvenOffset(ctxt, ins.as, ins.imm) + wantImmI(ctxt, ins.as, ins.imm, 21) + wantIntReg(ctxt, ins.as, "rd", ins.rd) + wantNoneReg(ctxt, ins.as, "rs1", ins.rs1) + wantNoneReg(ctxt, ins.as, "rs2", ins.rs2) +} + +func validateRaw(ctxt *obj.Link, ins *instruction) { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. + if ins.imm < 0 || 1<<32 <= ins.imm { + ctxt.Diag("%v\timmediate in raw position cannot be larger than 32 bits but got %d", ins.as, ins.imm) + } +} + +// encodeR encodes an R-type RISC-V instruction. +func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeR: could not encode instruction") + } + if enc.rs2 != 0 && rs2 != 0 { + panic("encodeR: instruction uses rs2, but rs2 was nonzero") + } + return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode +} + +func encodeRIII(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFFI(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFI(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRIF(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) +} + +func encodeRFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) +} + +// encodeI encodes an I-type RISC-V instruction. +func encodeI(as obj.As, rs1, rd, imm uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeI: could not encode instruction") + } + imm |= uint32(enc.csr) + return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode +} + +func encodeII(ins *instruction) uint32 { + return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm)) +} + +func encodeIF(ins *instruction) uint32 { + return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm)) +} + +// encodeS encodes an S-type RISC-V instruction. +func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeS: could not encode instruction") + } + return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode +} + +func encodeSI(ins *instruction) uint32 { + return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm)) +} + +func encodeSF(ins *instruction) uint32 { + return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm)) +} + +// encodeB encodes a B-type RISC-V instruction. +func encodeB(ins *instruction) uint32 { + imm := immI(ins.as, ins.imm, 13) + rs2 := regI(ins.rs1) + rs1 := regI(ins.rs2) + enc := encode(ins.as) + if enc == nil { + panic("encodeB: could not encode instruction") + } + return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7 | enc.opcode +} + +// encodeU encodes a U-type RISC-V instruction. +func encodeU(ins *instruction) uint32 { + // The immediates for encodeU are the upper 20 bits of a 32 bit value. + // Rather than have the user/compiler generate a 32 bit constant, the + // bottommost bits of which must all be zero, instead accept just the + // top bits. + imm := immI(ins.as, ins.imm, 20) + rd := regI(ins.rd) + enc := encode(ins.as) + if enc == nil { + panic("encodeU: could not encode instruction") + } + return imm<<12 | rd<<7 | enc.opcode +} + +// encodeJ encodes a J-type RISC-V instruction. +func encodeJ(ins *instruction) uint32 { + imm := immI(ins.as, ins.imm, 21) + rd := regI(ins.rd) + enc := encode(ins.as) + if enc == nil { + panic("encodeJ: could not encode instruction") + } + return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 | rd<<7 | enc.opcode +} + +func encodeRawIns(ins *instruction) uint32 { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. + if ins.imm < 0 || 1<<32 <= ins.imm { + panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm)) + } + return uint32(ins.imm) +} + +func EncodeIImmediate(imm int64) (int64, error) { + if !immIFits(imm, 12) { + return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm) + } + return imm << 20, nil +} + +func EncodeSImmediate(imm int64) (int64, error) { + if !immIFits(imm, 12) { + return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm) + } + return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil +} + +func EncodeUImmediate(imm int64) (int64, error) { + if !immIFits(imm, 20) { + return 0, fmt.Errorf("immediate %#x does not fit in 20 bits", imm) + } + return imm << 12, nil +} + +type encoding struct { + encode func(*instruction) uint32 // encode returns the machine code for an instruction + validate func(*obj.Link, *instruction) // validate validates an instruction + length int // length of encoded instruction; 0 for pseudo-ops, 4 otherwise +} + +var ( + // Encodings have the following naming convention: + // + // 1. the instruction encoding (R/I/S/B/U/J), in lowercase + // 2. zero or more register operand identifiers (I = integer + // register, F = float register), in uppercase + // 3. the word "Encoding" + // + // For example, rIIIEncoding indicates an R-type instruction with two + // integer register inputs and an integer register output; sFEncoding + // indicates an S-type instruction with rs2 being a float register. + + rIIIEncoding = encoding{encode: encodeRIII, validate: validateRIII, length: 4} + rFFFEncoding = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4} + rFFIEncoding = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4} + rFIEncoding = encoding{encode: encodeRFI, validate: validateRFI, length: 4} + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} + + iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4} + iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4} + + sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4} + sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4} + + bEncoding = encoding{encode: encodeB, validate: validateB, length: 4} + uEncoding = encoding{encode: encodeU, validate: validateU, length: 4} + jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4} + + // rawEncoding encodes a raw instruction byte sequence. + rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4} + + // pseudoOpEncoding panics if encoding is attempted, but does no validation. + pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0} + + // badEncoding is used when an invalid op is encountered. + // An error has already been generated, so let anything else through. + badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0} +) + +// encodings contains the encodings for RISC-V instructions. +// Instructions are masked with obj.AMask to keep indices small. +var encodings = [ALAST & obj.AMask]encoding{ + + // Unprivileged ISA + + // 2.4: Integer Computational Instructions + AADDI & obj.AMask: iIEncoding, + ASLTI & obj.AMask: iIEncoding, + ASLTIU & obj.AMask: iIEncoding, + AANDI & obj.AMask: iIEncoding, + AORI & obj.AMask: iIEncoding, + AXORI & obj.AMask: iIEncoding, + ASLLI & obj.AMask: iIEncoding, + ASRLI & obj.AMask: iIEncoding, + ASRAI & obj.AMask: iIEncoding, + ALUI & obj.AMask: uEncoding, + AAUIPC & obj.AMask: uEncoding, + AADD & obj.AMask: rIIIEncoding, + ASLT & obj.AMask: rIIIEncoding, + ASLTU & obj.AMask: rIIIEncoding, + AAND & obj.AMask: rIIIEncoding, + AOR & obj.AMask: rIIIEncoding, + AXOR & obj.AMask: rIIIEncoding, + ASLL & obj.AMask: rIIIEncoding, + ASRL & obj.AMask: rIIIEncoding, + ASUB & obj.AMask: rIIIEncoding, + ASRA & obj.AMask: rIIIEncoding, + + // 2.5: Control Transfer Instructions + AJAL & obj.AMask: jEncoding, + AJALR & obj.AMask: iIEncoding, + ABEQ & obj.AMask: bEncoding, + ABNE & obj.AMask: bEncoding, + ABLT & obj.AMask: bEncoding, + ABLTU & obj.AMask: bEncoding, + ABGE & obj.AMask: bEncoding, + ABGEU & obj.AMask: bEncoding, + + // 2.6: Load and Store Instructions + ALW & obj.AMask: iIEncoding, + ALWU & obj.AMask: iIEncoding, + ALH & obj.AMask: iIEncoding, + ALHU & obj.AMask: iIEncoding, + ALB & obj.AMask: iIEncoding, + ALBU & obj.AMask: iIEncoding, + ASW & obj.AMask: sIEncoding, + ASH & obj.AMask: sIEncoding, + ASB & obj.AMask: sIEncoding, + + // 2.7: Memory Ordering + AFENCE & obj.AMask: iIEncoding, + + // 5.2: Integer Computational Instructions (RV64I) + AADDIW & obj.AMask: iIEncoding, + ASLLIW & obj.AMask: iIEncoding, + ASRLIW & obj.AMask: iIEncoding, + ASRAIW & obj.AMask: iIEncoding, + AADDW & obj.AMask: rIIIEncoding, + ASLLW & obj.AMask: rIIIEncoding, + ASRLW & obj.AMask: rIIIEncoding, + ASUBW & obj.AMask: rIIIEncoding, + ASRAW & obj.AMask: rIIIEncoding, + + // 5.3: Load and Store Instructions (RV64I) + ALD & obj.AMask: iIEncoding, + ASD & obj.AMask: sIEncoding, + + // 7.1: Multiplication Operations + AMUL & obj.AMask: rIIIEncoding, + AMULH & obj.AMask: rIIIEncoding, + AMULHU & obj.AMask: rIIIEncoding, + AMULHSU & obj.AMask: rIIIEncoding, + AMULW & obj.AMask: rIIIEncoding, + ADIV & obj.AMask: rIIIEncoding, + ADIVU & obj.AMask: rIIIEncoding, + AREM & obj.AMask: rIIIEncoding, + AREMU & obj.AMask: rIIIEncoding, + ADIVW & obj.AMask: rIIIEncoding, + ADIVUW & obj.AMask: rIIIEncoding, + AREMW & obj.AMask: rIIIEncoding, + AREMUW & obj.AMask: rIIIEncoding, + + // 8.2: Load-Reserved/Store-Conditional + ALRW & obj.AMask: rIIIEncoding, + ALRD & obj.AMask: rIIIEncoding, + ASCW & obj.AMask: rIIIEncoding, + ASCD & obj.AMask: rIIIEncoding, + + // 8.3: Atomic Memory Operations + AAMOSWAPW & obj.AMask: rIIIEncoding, + AAMOSWAPD & obj.AMask: rIIIEncoding, + AAMOADDW & obj.AMask: rIIIEncoding, + AAMOADDD & obj.AMask: rIIIEncoding, + AAMOANDW & obj.AMask: rIIIEncoding, + AAMOANDD & obj.AMask: rIIIEncoding, + AAMOORW & obj.AMask: rIIIEncoding, + AAMOORD & obj.AMask: rIIIEncoding, + AAMOXORW & obj.AMask: rIIIEncoding, + AAMOXORD & obj.AMask: rIIIEncoding, + AAMOMAXW & obj.AMask: rIIIEncoding, + AAMOMAXD & obj.AMask: rIIIEncoding, + AAMOMAXUW & obj.AMask: rIIIEncoding, + AAMOMAXUD & obj.AMask: rIIIEncoding, + AAMOMINW & obj.AMask: rIIIEncoding, + AAMOMIND & obj.AMask: rIIIEncoding, + AAMOMINUW & obj.AMask: rIIIEncoding, + AAMOMINUD & obj.AMask: rIIIEncoding, + + // 10.1: Base Counters and Timers + ARDCYCLE & obj.AMask: iIEncoding, + ARDTIME & obj.AMask: iIEncoding, + ARDINSTRET & obj.AMask: iIEncoding, + + // 11.5: Single-Precision Load and Store Instructions + AFLW & obj.AMask: iFEncoding, + AFSW & obj.AMask: sFEncoding, + + // 11.6: Single-Precision Floating-Point Computational Instructions + AFADDS & obj.AMask: rFFFEncoding, + AFSUBS & obj.AMask: rFFFEncoding, + AFMULS & obj.AMask: rFFFEncoding, + AFDIVS & obj.AMask: rFFFEncoding, + AFMINS & obj.AMask: rFFFEncoding, + AFMAXS & obj.AMask: rFFFEncoding, + AFSQRTS & obj.AMask: rFFFEncoding, + + // 11.7: Single-Precision Floating-Point Conversion and Move Instructions + AFCVTWS & obj.AMask: rFIEncoding, + AFCVTLS & obj.AMask: rFIEncoding, + AFCVTSW & obj.AMask: rIFEncoding, + AFCVTSL & obj.AMask: rIFEncoding, + AFCVTWUS & obj.AMask: rFIEncoding, + AFCVTLUS & obj.AMask: rFIEncoding, + AFCVTSWU & obj.AMask: rIFEncoding, + AFCVTSLU & obj.AMask: rIFEncoding, + AFSGNJS & obj.AMask: rFFFEncoding, + AFSGNJNS & obj.AMask: rFFFEncoding, + AFSGNJXS & obj.AMask: rFFFEncoding, + AFMVXS & obj.AMask: rFIEncoding, + AFMVSX & obj.AMask: rIFEncoding, + AFMVXW & obj.AMask: rFIEncoding, + AFMVWX & obj.AMask: rIFEncoding, + + // 11.8: Single-Precision Floating-Point Compare Instructions + AFEQS & obj.AMask: rFFIEncoding, + AFLTS & obj.AMask: rFFIEncoding, + AFLES & obj.AMask: rFFIEncoding, + + // 11.9: Single-Precision Floating-Point Classify Instruction + AFCLASSS & obj.AMask: rFIEncoding, + + // 12.3: Double-Precision Load and Store Instructions + AFLD & obj.AMask: iFEncoding, + AFSD & obj.AMask: sFEncoding, + + // 12.4: Double-Precision Floating-Point Computational Instructions + AFADDD & obj.AMask: rFFFEncoding, + AFSUBD & obj.AMask: rFFFEncoding, + AFMULD & obj.AMask: rFFFEncoding, + AFDIVD & obj.AMask: rFFFEncoding, + AFMIND & obj.AMask: rFFFEncoding, + AFMAXD & obj.AMask: rFFFEncoding, + AFSQRTD & obj.AMask: rFFFEncoding, + + // 12.5: Double-Precision Floating-Point Conversion and Move Instructions + AFCVTWD & obj.AMask: rFIEncoding, + AFCVTLD & obj.AMask: rFIEncoding, + AFCVTDW & obj.AMask: rIFEncoding, + AFCVTDL & obj.AMask: rIFEncoding, + AFCVTWUD & obj.AMask: rFIEncoding, + AFCVTLUD & obj.AMask: rFIEncoding, + AFCVTDWU & obj.AMask: rIFEncoding, + AFCVTDLU & obj.AMask: rIFEncoding, + AFCVTSD & obj.AMask: rFFEncoding, + AFCVTDS & obj.AMask: rFFEncoding, + AFSGNJD & obj.AMask: rFFFEncoding, + AFSGNJND & obj.AMask: rFFFEncoding, + AFSGNJXD & obj.AMask: rFFFEncoding, + AFMVXD & obj.AMask: rFIEncoding, + AFMVDX & obj.AMask: rIFEncoding, + + // 12.6: Double-Precision Floating-Point Compare Instructions + AFEQD & obj.AMask: rFFIEncoding, + AFLTD & obj.AMask: rFFIEncoding, + AFLED & obj.AMask: rFFIEncoding, + + // 12.7: Double-Precision Floating-Point Classify Instruction + AFCLASSD & obj.AMask: rFIEncoding, + + // Privileged ISA + + // 3.2.1: Environment Call and Breakpoint + AECALL & obj.AMask: iIEncoding, + AEBREAK & obj.AMask: iIEncoding, + + // Escape hatch + AWORD & obj.AMask: rawEncoding, + + // Pseudo-operations + obj.AFUNCDATA: pseudoOpEncoding, + obj.APCDATA: pseudoOpEncoding, + obj.ATEXT: pseudoOpEncoding, + obj.ANOP: pseudoOpEncoding, + obj.ADUFFZERO: pseudoOpEncoding, + obj.ADUFFCOPY: pseudoOpEncoding, +} + +// encodingForAs returns the encoding for an obj.As. +func encodingForAs(as obj.As) (encoding, error) { + if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 { + return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as) + } + asi := as & obj.AMask + if int(asi) >= len(encodings) { + return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as) + } + enc := encodings[asi] + if enc.validate == nil { + return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as) + } + return enc, nil +} + +type instruction struct { + as obj.As // Assembler opcode + rd uint32 // Destination register + rs1 uint32 // Source register 1 + rs2 uint32 // Source register 2 + imm int64 // Immediate + funct3 uint32 // Function 3 + funct7 uint32 // Function 7 +} + +func (ins *instruction) encode() (uint32, error) { + enc, err := encodingForAs(ins.as) + if err != nil { + return 0, err + } + if enc.length > 0 { + return enc.encode(ins), nil + } + return 0, fmt.Errorf("fixme") +} + +func (ins *instruction) length() int { + enc, err := encodingForAs(ins.as) + if err != nil { + return 0 + } + return enc.length +} + +func (ins *instruction) validate(ctxt *obj.Link) { + enc, err := encodingForAs(ins.as) + if err != nil { + ctxt.Diag(err.Error()) + return + } + enc.validate(ctxt, ins) +} + +// instructionsForProg returns the machine instructions for an *obj.Prog. +func instructionsForProg(p *obj.Prog) []*instruction { + ins := &instruction{ + as: p.As, + rd: uint32(p.To.Reg), + rs1: uint32(p.Reg), + rs2: uint32(p.From.Reg), + imm: p.From.Offset, + } + + inss := []*instruction{ins} + switch ins.as { + case AJAL, AJALR: + ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE + ins.imm = p.To.Offset + + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: + switch ins.as { + case ABEQZ: + ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg) + case ABGEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg) + case ABGT: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg) + case ABGTU: + ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg) + case ABGTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO + case ABLE: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg) + case ABLEU: + ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg) + case ABLEZ: + ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO + case ABLTZ: + ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg) + case ABNEZ: + ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg) + } + ins.imm = p.To.Offset + + case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: + // Handle register to register moves. + if p.From.Type != obj.TYPE_REG || p.To.Type != obj.TYPE_REG { + break + } + switch p.As { + case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0 + case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb + ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255 + case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg) + case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb + ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg) + case AMOVB, AMOVH: + // Use SLLI/SRAI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVB { + ins.imm = 56 + } else if p.As == AMOVH { + ins.imm = 48 + } + ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + case AMOVHU, AMOVWU: + // Use SLLI/SRLI to extend. + ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE + if p.As == AMOVHU { + ins.imm = 48 + } else if p.As == AMOVWU { + ins.imm = 32 + } + ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm} + inss = append(inss, ins2) + } + + case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD: + if p.From.Type != obj.TYPE_MEM { + p.Ctxt.Diag("%v requires memory for source", p) + return nil + } + ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + ins.imm = p.From.Offset + + case ASW, ASH, ASB, ASD, AFSW, AFSD: + if p.To.Type != obj.TYPE_MEM { + p.Ctxt.Diag("%v requires memory for destination", p) + return nil + } + ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + ins.imm = p.To.Offset + + case ALRW, ALRD: + // Set aq to use acquire access ordering, which matches Go's memory requirements. + ins.funct7 = 2 + ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO + + case ASCW, ASCD, AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD, + AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD: + // Set aq to use acquire access ordering, which matches Go's memory requirements. + ins.funct7 = 2 + ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg) + + case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET: + insEnc := encode(p.As) + if p.To.Type == obj.TYPE_NONE { + ins.rd = REG_ZERO + } + ins.rs1 = REG_ZERO + ins.imm = insEnc.csr + + case AFENCE: + ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE + ins.imm = 0x0ff + + case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD: + // Set the rounding mode in funct3 to round to zero. + ins.funct3 = 1 + + case AFNES, AFNED: + // Replace FNE[SD] with FEQ[SD] and NOT. + if p.To.Type != obj.TYPE_REG { + p.Ctxt.Diag("%v needs an integer register output", ins.as) + return nil + } + if ins.as == AFNES { + ins.as = AFEQS + } else { + ins.as = AFEQD + } + ins2 := &instruction{ + as: AXORI, // [bit] xor 1 = not [bit] + rd: ins.rd, + rs1: ins.rd, + imm: 1, + } + inss = append(inss, ins2) + + case AFSQRTS, AFSQRTD: + // These instructions expect a zero (i.e. float register 0) + // to be the second input operand. + ins.rs1 = uint32(p.From.Reg) + ins.rs2 = REG_F0 + + case ANEG, ANEGW: + // NEG rs, rd -> SUB rs, X0, rd + ins.as = ASUB + if p.As == ANEGW { + ins.as = ASUBW + } + ins.rs1 = REG_ZERO + if ins.rd == obj.REG_NONE { + ins.rd = ins.rs2 + } + + case ANOT: + // NOT rs, rd -> XORI $-1, rs, rd + ins.as = AXORI + ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE + if ins.rd == obj.REG_NONE { + ins.rd = ins.rs1 + } + ins.imm = -1 + + case ASEQZ: + // SEQZ rs, rd -> SLTIU $1, rs, rd + ins.as = ASLTIU + ins.rs1 = uint32(p.From.Reg) + ins.imm = 1 + + case ASNEZ: + // SNEZ rs, rd -> SLTU rs, x0, rd + ins.as = ASLTU + ins.rs1 = REG_ZERO + + case AFNEGS: + // FNEGS rs, rd -> FSGNJNS rs, rs, rd + ins.as = AFSGNJNS + ins.rs1 = uint32(p.From.Reg) + + case AFNEGD: + // FNEGD rs, rd -> FSGNJND rs, rs, rd + ins.as = AFSGNJND + ins.rs1 = uint32(p.From.Reg) + } + return inss +} + +// assemble emits machine code. +// It is called at the very end of the assembly process. +func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if ctxt.Retpoline { + ctxt.Diag("-spectre=ret not supported on riscv") + ctxt.Retpoline = false // don't keep printing + } + + var symcode []uint32 + for p := cursym.Func().Text; p != nil; p = p.Link { + switch p.As { + case AJALR: + if p.To.Sym != nil { + // This is a CALL/JMP. We add a relocation only + // for linker stack checking. No actual + // relocation is needed. + rel := obj.Addrel(cursym) + rel.Off = int32(p.Pc) + rel.Siz = 4 + rel.Sym = p.To.Sym + rel.Add = p.To.Offset + rel.Type = objabi.R_CALLRISCV + } + case AAUIPC: + var rt objabi.RelocType + if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC { + rt = objabi.R_RISCV_PCREL_ITYPE + } else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC { + rt = objabi.R_RISCV_PCREL_STYPE + } else { + break + } + if p.Link == nil { + ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction") + break + } + addr := p.RestArgs[0] + if addr.Sym == nil { + ctxt.Diag("AUIPC needing PC-relative reloc missing symbol") + break + } + if addr.Sym.Type == objabi.STLSBSS { + if rt == objabi.R_RISCV_PCREL_ITYPE { + rt = objabi.R_RISCV_TLS_IE_ITYPE + } else if rt == objabi.R_RISCV_PCREL_STYPE { + rt = objabi.R_RISCV_TLS_IE_STYPE + } + } + + rel := obj.Addrel(cursym) + rel.Off = int32(p.Pc) + rel.Siz = 8 + rel.Sym = addr.Sym + rel.Add = addr.Offset + rel.Type = rt + } + + for _, ins := range instructionsForProg(p) { + ic, err := ins.encode() + if err == nil { + symcode = append(symcode, ic) + } + } + } + cursym.Size = int64(4 * len(symcode)) + + cursym.Grow(cursym.Size) + for p, i := cursym.P, 0; i < len(symcode); p, i = p[4:], i+1 { + ctxt.Arch.ByteOrder.PutUint32(p, symcode[i]) + } + + obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil) +} + +func isUnsafePoint(p *obj.Prog) bool { + return p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP +} + +var LinkRISCV64 = obj.LinkArch{ + Arch: sys.ArchRISCV64, + Init: buildop, + Preprocess: preprocess, + Assemble: assemble, + Progedit: progedit, + UnaryDst: unaryDst, + DWARFRegisters: RISCV64DWARFRegisters, +} |