summaryrefslogtreecommitdiffstats
path: root/src/cmd/internal/obj/x86/obj6.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/internal/obj/x86/obj6.go')
-rw-r--r--src/cmd/internal/obj/x86/obj6.go1261
1 files changed, 1261 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go
new file mode 100644
index 0000000..184fb43
--- /dev/null
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -0,0 +1,1261 @@
+// Inferno utils/6l/pass.c
+// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+// Portions Copyright © 1997-1999 Vita Nuova Limited
+// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
+// Portions Copyright © 2004,2006 Bruce Ellis
+// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+// Portions Copyright © 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package x86
+
+import (
+ "cmd/internal/obj"
+ "cmd/internal/objabi"
+ "cmd/internal/src"
+ "cmd/internal/sys"
+ "math"
+ "strings"
+)
+
+func CanUse1InsnTLS(ctxt *obj.Link) bool {
+ if isAndroid {
+ // Android uses a global variable for the tls offset.
+ return false
+ }
+
+ if ctxt.Arch.Family == sys.I386 {
+ switch ctxt.Headtype {
+ case objabi.Hlinux,
+ objabi.Hplan9,
+ objabi.Hwindows:
+ return false
+ }
+
+ return true
+ }
+
+ switch ctxt.Headtype {
+ case objabi.Hplan9, objabi.Hwindows:
+ return false
+ case objabi.Hlinux, objabi.Hfreebsd:
+ return !ctxt.Flag_shared
+ }
+
+ return true
+}
+
+func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
+ // Thread-local storage references use the TLS pseudo-register.
+ // As a register, TLS refers to the thread-local storage base, and it
+ // can only be loaded into another register:
+ //
+ // MOVQ TLS, AX
+ //
+ // An offset from the thread-local storage base is written off(reg)(TLS*1).
+ // Semantically it is off(reg), but the (TLS*1) annotation marks this as
+ // indexing from the loaded TLS base. This emits a relocation so that
+ // if the linker needs to adjust the offset, it can. For example:
+ //
+ // MOVQ TLS, AX
+ // MOVQ 0(AX)(TLS*1), CX // load g into CX
+ //
+ // On systems that support direct access to the TLS memory, this
+ // pair of instructions can be reduced to a direct TLS memory reference:
+ //
+ // MOVQ 0(TLS), CX // load g into CX
+ //
+ // The 2-instruction and 1-instruction forms correspond to the two code
+ // sequences for loading a TLS variable in the local exec model given in "ELF
+ // Handling For Thread-Local Storage".
+ //
+ // We apply this rewrite on systems that support the 1-instruction form.
+ // The decision is made using only the operating system and the -shared flag,
+ // not the link mode. If some link modes on a particular operating system
+ // require the 2-instruction form, then all builds for that operating system
+ // will use the 2-instruction form, so that the link mode decision can be
+ // delayed to link time.
+ //
+ // In this way, all supported systems use identical instructions to
+ // access TLS, and they are rewritten appropriately first here in
+ // liblink and then finally using relocations in the linker.
+ //
+ // When -shared is passed, we leave the code in the 2-instruction form but
+ // assemble (and relocate) them in different ways to generate the initial
+ // exec code sequence. It's a bit of a fluke that this is possible without
+ // rewriting the instructions more comprehensively, and it only does because
+ // we only support a single TLS variable (g).
+
+ if CanUse1InsnTLS(ctxt) {
+ // Reduce 2-instruction sequence to 1-instruction sequence.
+ // Sequences like
+ // MOVQ TLS, BX
+ // ... off(BX)(TLS*1) ...
+ // become
+ // NOP
+ // ... off(TLS) ...
+ //
+ // TODO(rsc): Remove the Hsolaris special case. It exists only to
+ // guarantee we are producing byte-identical binaries as before this code.
+ // But it should be unnecessary.
+ if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
+ obj.Nopout(p)
+ }
+ if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
+ p.From.Reg = REG_TLS
+ p.From.Scale = 0
+ p.From.Index = REG_NONE
+ }
+
+ if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
+ p.To.Reg = REG_TLS
+ p.To.Scale = 0
+ p.To.Index = REG_NONE
+ }
+ } else {
+ // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it
+ // as the 2-instruction sequence if necessary.
+ // MOVQ 0(TLS), BX
+ // becomes
+ // MOVQ TLS, BX
+ // MOVQ 0(BX)(TLS*1), BX
+ if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
+ q := obj.Appendp(p, newprog)
+ q.As = p.As
+ q.From = p.From
+ q.From.Type = obj.TYPE_MEM
+ q.From.Reg = p.To.Reg
+ q.From.Index = REG_TLS
+ q.From.Scale = 2 // TODO: use 1
+ q.To = p.To
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_TLS
+ p.From.Index = REG_NONE
+ p.From.Offset = 0
+ }
+ }
+
+ // Android uses a tls offset determined at runtime. Rewrite
+ // MOVQ TLS, BX
+ // to
+ // MOVQ runtime.tls_g(SB), BX
+ if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_EXTERN
+ p.From.Reg = REG_NONE
+ p.From.Sym = ctxt.Lookup("runtime.tls_g")
+ p.From.Index = REG_NONE
+ }
+
+ // TODO: Remove.
+ if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
+ if p.From.Scale == 1 && p.From.Index == REG_TLS {
+ p.From.Scale = 2
+ }
+ if p.To.Scale == 1 && p.To.Index == REG_TLS {
+ p.To.Scale = 2
+ }
+ }
+
+ // Rewrite 0 to $0 in 3rd argument to CMPPS etc.
+ // That's what the tables expect.
+ switch p.As {
+ case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
+ if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
+ p.To.Type = obj.TYPE_CONST
+ }
+ }
+
+ // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
+ switch p.As {
+ case obj.ACALL, obj.AJMP, obj.ARET:
+ if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
+ p.To.Type = obj.TYPE_BRANCH
+ }
+ }
+
+ // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
+ if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
+ switch p.As {
+ case AMOVL:
+ p.As = ALEAL
+ p.From.Type = obj.TYPE_MEM
+ case AMOVQ:
+ p.As = ALEAQ
+ p.From.Type = obj.TYPE_MEM
+ }
+ }
+
+ // Rewrite float constants to values stored in memory.
+ switch p.As {
+ // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
+ case AMOVSS:
+ if p.From.Type == obj.TYPE_FCONST {
+ // f == 0 can't be used here due to -0, so use Float64bits
+ if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
+ if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
+ p.As = AXORPS
+ p.From = p.To
+ break
+ }
+ }
+ }
+ fallthrough
+
+ case AFMOVF,
+ AFADDF,
+ AFSUBF,
+ AFSUBRF,
+ AFMULF,
+ AFDIVF,
+ AFDIVRF,
+ AFCOMF,
+ AFCOMFP,
+ AADDSS,
+ ASUBSS,
+ AMULSS,
+ ADIVSS,
+ ACOMISS,
+ AUCOMISS:
+ if p.From.Type == obj.TYPE_FCONST {
+ f32 := float32(p.From.Val.(float64))
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_EXTERN
+ p.From.Sym = ctxt.Float32Sym(f32)
+ p.From.Offset = 0
+ }
+
+ case AMOVSD:
+ // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
+ if p.From.Type == obj.TYPE_FCONST {
+ // f == 0 can't be used here due to -0, so use Float64bits
+ if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
+ if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
+ p.As = AXORPS
+ p.From = p.To
+ break
+ }
+ }
+ }
+ fallthrough
+
+ case AFMOVD,
+ AFADDD,
+ AFSUBD,
+ AFSUBRD,
+ AFMULD,
+ AFDIVD,
+ AFDIVRD,
+ AFCOMD,
+ AFCOMDP,
+ AADDSD,
+ ASUBSD,
+ AMULSD,
+ ADIVSD,
+ ACOMISD,
+ AUCOMISD:
+ if p.From.Type == obj.TYPE_FCONST {
+ f64 := p.From.Val.(float64)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_EXTERN
+ p.From.Sym = ctxt.Float64Sym(f64)
+ p.From.Offset = 0
+ }
+ }
+
+ if ctxt.Flag_dynlink {
+ rewriteToUseGot(ctxt, p, newprog)
+ }
+
+ if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
+ rewriteToPcrel(ctxt, p, newprog)
+ }
+}
+
+// Rewrite p, if necessary, to access global data via the global offset table.
+func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
+ var lea, mov obj.As
+ var reg int16
+ if ctxt.Arch.Family == sys.AMD64 {
+ lea = ALEAQ
+ mov = AMOVQ
+ reg = REG_R15
+ } else {
+ lea = ALEAL
+ mov = AMOVL
+ reg = REG_CX
+ if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+ // Special case: clobber the destination register with
+ // the PC so we don't have to clobber CX.
+ // The SSA backend depends on CX not being clobbered across LEAL.
+ // See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
+ reg = p.To.Reg
+ }
+ }
+
+ if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
+ // ADUFFxxx $offset
+ // becomes
+ // $MOV runtime.duffxxx@GOT, $reg
+ // $LEA $offset($reg), $reg
+ // CALL $reg
+ // (we use LEAx rather than ADDx because ADDx clobbers
+ // flags and duffzero on 386 does not otherwise do so).
+ var sym *obj.LSym
+ if p.As == obj.ADUFFZERO {
+ sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
+ } else {
+ sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
+ }
+ offset := p.To.Offset
+ p.As = mov
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_GOTREF
+ p.From.Sym = sym
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = reg
+ p.To.Offset = 0
+ p.To.Sym = nil
+ p1 := obj.Appendp(p, newprog)
+ p1.As = lea
+ p1.From.Type = obj.TYPE_MEM
+ p1.From.Offset = offset
+ p1.From.Reg = reg
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = reg
+ p2 := obj.Appendp(p1, newprog)
+ p2.As = obj.ACALL
+ p2.To.Type = obj.TYPE_REG
+ p2.To.Reg = reg
+ }
+
+ // We only care about global data: NAME_EXTERN means a global
+ // symbol in the Go sense, and p.Sym.Local is true for a few
+ // internally defined symbols.
+ if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
+ // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
+ p.As = mov
+ p.From.Type = obj.TYPE_ADDR
+ }
+ if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
+ // $MOV $sym, Rx becomes $MOV sym@GOT, Rx
+ // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
+ // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
+ cmplxdest := false
+ pAs := p.As
+ var dest obj.Addr
+ if p.To.Type != obj.TYPE_REG || pAs != mov {
+ if ctxt.Arch.Family == sys.AMD64 {
+ ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
+ }
+ cmplxdest = true
+ dest = p.To
+ p.As = mov
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = reg
+ p.To.Sym = nil
+ p.To.Name = obj.NAME_NONE
+ }
+ p.From.Type = obj.TYPE_MEM
+ p.From.Name = obj.NAME_GOTREF
+ q := p
+ if p.From.Offset != 0 {
+ q = obj.Appendp(p, newprog)
+ q.As = lea
+ q.From.Type = obj.TYPE_MEM
+ q.From.Reg = p.To.Reg
+ q.From.Offset = p.From.Offset
+ q.To = p.To
+ p.From.Offset = 0
+ }
+ if cmplxdest {
+ q = obj.Appendp(q, newprog)
+ q.As = pAs
+ q.To = dest
+ q.From.Type = obj.TYPE_REG
+ q.From.Reg = reg
+ }
+ }
+ if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
+ ctxt.Diag("don't know how to handle %v with -dynlink", p)
+ }
+ var source *obj.Addr
+ // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
+ // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
+ // An addition may be inserted between the two MOVs if there is an offset.
+ if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
+ if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
+ ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
+ }
+ source = &p.From
+ } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
+ source = &p.To
+ } else {
+ return
+ }
+ if p.As == obj.ACALL {
+ // When dynlinking on 386, almost any call might end up being a call
+ // to a PLT, so make sure the GOT pointer is loaded into BX.
+ // RegTo2 is set on the replacement call insn to stop it being
+ // processed when it is in turn passed to progedit.
+ //
+ // We disable open-coded defers in buildssa() on 386 ONLY with shared
+ // libraries because of this extra code added before deferreturn calls.
+ if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
+ return
+ }
+ p1 := obj.Appendp(p, newprog)
+ p2 := obj.Appendp(p1, newprog)
+
+ p1.As = ALEAL
+ p1.From.Type = obj.TYPE_MEM
+ p1.From.Name = obj.NAME_STATIC
+ p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = REG_BX
+
+ p2.As = p.As
+ p2.Scond = p.Scond
+ p2.From = p.From
+ if p.RestArgs != nil {
+ p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
+ }
+ p2.Reg = p.Reg
+ p2.To = p.To
+ // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
+ // in ../pass.go complain, so set it back to TYPE_MEM here, until p2
+ // itself gets passed to progedit.
+ p2.To.Type = obj.TYPE_MEM
+ p2.RegTo2 = 1
+
+ obj.Nopout(p)
+ return
+
+ }
+ if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
+ return
+ }
+ if source.Type != obj.TYPE_MEM {
+ ctxt.Diag("don't know how to handle %v with -dynlink", p)
+ }
+ p1 := obj.Appendp(p, newprog)
+ p2 := obj.Appendp(p1, newprog)
+
+ p1.As = mov
+ p1.From.Type = obj.TYPE_MEM
+ p1.From.Sym = source.Sym
+ p1.From.Name = obj.NAME_GOTREF
+ p1.To.Type = obj.TYPE_REG
+ p1.To.Reg = reg
+
+ p2.As = p.As
+ p2.From = p.From
+ p2.To = p.To
+ if p.From.Name == obj.NAME_EXTERN {
+ p2.From.Reg = reg
+ p2.From.Name = obj.NAME_NONE
+ p2.From.Sym = nil
+ } else if p.To.Name == obj.NAME_EXTERN {
+ p2.To.Reg = reg
+ p2.To.Name = obj.NAME_NONE
+ p2.To.Sym = nil
+ } else {
+ return
+ }
+ obj.Nopout(p)
+}
+
+func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
+ // RegTo2 is set on the instructions we insert here so they don't get
+ // processed twice.
+ if p.RegTo2 != 0 {
+ return
+ }
+ if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
+ return
+ }
+ // Any Prog (aside from the above special cases) with an Addr with Name ==
+ // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
+ // inserted before it.
+ isName := func(a *obj.Addr) bool {
+ if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
+ return false
+ }
+ if a.Sym.Type == objabi.STLSBSS {
+ return false
+ }
+ return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
+ }
+
+ if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
+ // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
+ // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
+ // respectively.
+ if p.To.Type != obj.TYPE_REG {
+ q := obj.Appendp(p, newprog)
+ q.As = p.As
+ q.From.Type = obj.TYPE_REG
+ q.From.Reg = REG_CX
+ q.To = p.To
+ p.As = AMOVL
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_CX
+ p.To.Sym = nil
+ p.To.Name = obj.NAME_NONE
+ }
+ }
+
+ if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
+ return
+ }
+ var dst int16 = REG_CX
+ if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+ dst = p.To.Reg
+ // Why? See the comment near the top of rewriteToUseGot above.
+ // AMOVLs might be introduced by the GOT rewrites.
+ }
+ q := obj.Appendp(p, newprog)
+ q.RegTo2 = 1
+ r := obj.Appendp(q, newprog)
+ r.RegTo2 = 1
+ q.As = obj.ACALL
+ thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
+ q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
+ q.To.Type = obj.TYPE_MEM
+ q.To.Name = obj.NAME_EXTERN
+ r.As = p.As
+ r.Scond = p.Scond
+ r.From = p.From
+ r.RestArgs = p.RestArgs
+ r.Reg = p.Reg
+ r.To = p.To
+ if isName(&p.From) {
+ r.From.Reg = dst
+ }
+ if isName(&p.To) {
+ r.To.Reg = dst
+ }
+ if p.GetFrom3() != nil && isName(p.GetFrom3()) {
+ r.GetFrom3().Reg = dst
+ }
+ obj.Nopout(p)
+}
+
+func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
+ if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
+ return
+ }
+
+ p := cursym.Func().Text
+ autoffset := int32(p.To.Offset)
+ if autoffset < 0 {
+ autoffset = 0
+ }
+
+ hasCall := false
+ for q := p; q != nil; q = q.Link {
+ if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
+ hasCall = true
+ break
+ }
+ }
+
+ var bpsize int
+ if ctxt.Arch.Family == sys.AMD64 &&
+ !p.From.Sym.NoFrame() && // (1) below
+ !(autoffset == 0 && p.From.Sym.NoSplit()) && // (2) below
+ !(autoffset == 0 && !hasCall) { // (3) below
+ // Make room to save a base pointer.
+ // There are 2 cases we must avoid:
+ // 1) If noframe is set (which we do for functions which tail call).
+ // 2) Scary runtime internals which would be all messed up by frame pointers.
+ // We detect these using a heuristic: frameless nosplit functions.
+ // TODO: Maybe someday we label them all with NOFRAME and get rid of this heuristic.
+ // For performance, we also want to avoid:
+ // 3) Frameless leaf functions
+ bpsize = ctxt.Arch.PtrSize
+ autoffset += int32(bpsize)
+ p.To.Offset += int64(bpsize)
+ } else {
+ bpsize = 0
+ }
+
+ textarg := int64(p.To.Val.(int32))
+ cursym.Func().Args = int32(textarg)
+ cursym.Func().Locals = int32(p.To.Offset)
+
+ // TODO(rsc): Remove.
+ if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
+ cursym.Func().Locals = 0
+ }
+
+ // TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
+ if ctxt.Arch.Family == sys.AMD64 && autoffset < objabi.StackSmall && !p.From.Sym.NoSplit() {
+ leaf := true
+ LeafSearch:
+ for q := p; q != nil; q = q.Link {
+ switch q.As {
+ case obj.ACALL:
+ // Treat common runtime calls that take no arguments
+ // the same as duffcopy and duffzero.
+ if !isZeroArgRuntimeCall(q.To.Sym) {
+ leaf = false
+ break LeafSearch
+ }
+ fallthrough
+ case obj.ADUFFCOPY, obj.ADUFFZERO:
+ if autoffset >= objabi.StackSmall-8 {
+ leaf = false
+ break LeafSearch
+ }
+ }
+ }
+
+ if leaf {
+ p.From.Sym.Set(obj.AttrNoSplit, true)
+ }
+ }
+
+ if !p.From.Sym.NoSplit() || p.From.Sym.Wrapper() {
+ p = obj.Appendp(p, newprog)
+ p = load_g_cx(ctxt, p, newprog) // load g into CX
+ }
+
+ if !cursym.Func().Text.From.Sym.NoSplit() {
+ p = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg)) // emit split check
+ }
+
+ // Delve debugger would like the next instruction to be noted as the end of the function prologue.
+ // TODO: are there other cases (e.g., wrapper functions) that need marking?
+ markedPrologue := false
+
+ if autoffset != 0 {
+ if autoffset%int32(ctxt.Arch.RegSize) != 0 {
+ ctxt.Diag("unaligned stack size %d", autoffset)
+ }
+ p = obj.Appendp(p, newprog)
+ p.As = AADJSP
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = int64(autoffset)
+ p.Spadj = autoffset
+ p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
+ markedPrologue = true
+ }
+
+ if bpsize > 0 {
+ // Save caller's BP
+ p = obj.Appendp(p, newprog)
+
+ p.As = AMOVQ
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_BP
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = REG_SP
+ p.To.Scale = 1
+ p.To.Offset = int64(autoffset) - int64(bpsize)
+ if !markedPrologue {
+ p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
+ }
+
+ // Move current frame to BP
+ p = obj.Appendp(p, newprog)
+
+ p.As = ALEAQ
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_SP
+ p.From.Scale = 1
+ p.From.Offset = int64(autoffset) - int64(bpsize)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_BP
+ }
+
+ if cursym.Func().Text.From.Sym.Wrapper() {
+ // if g._panic != nil && g._panic.argp == FP {
+ // g._panic.argp = bottom-of-frame
+ // }
+ //
+ // MOVQ g_panic(CX), BX
+ // TESTQ BX, BX
+ // JNE checkargp
+ // end:
+ // NOP
+ // ... rest of function ...
+ // checkargp:
+ // LEAQ (autoffset+8)(SP), DI
+ // CMPQ panic_argp(BX), DI
+ // JNE end
+ // MOVQ SP, panic_argp(BX)
+ // JMP end
+ //
+ // The NOP is needed to give the jumps somewhere to land.
+ // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
+ //
+ // The layout is chosen to help static branch prediction:
+ // Both conditional jumps are unlikely, so they are arranged to be forward jumps.
+
+ // MOVQ g_panic(CX), BX
+ p = obj.Appendp(p, newprog)
+ p.As = AMOVQ
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_CX
+ p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_BX
+ if ctxt.Arch.Family == sys.I386 {
+ p.As = AMOVL
+ }
+
+ // TESTQ BX, BX
+ p = obj.Appendp(p, newprog)
+ p.As = ATESTQ
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_BX
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_BX
+ if ctxt.Arch.Family == sys.I386 {
+ p.As = ATESTL
+ }
+
+ // JNE checkargp (checkargp to be resolved later)
+ jne := obj.Appendp(p, newprog)
+ jne.As = AJNE
+ jne.To.Type = obj.TYPE_BRANCH
+
+ // end:
+ // NOP
+ end := obj.Appendp(jne, newprog)
+ end.As = obj.ANOP
+
+ // Fast forward to end of function.
+ var last *obj.Prog
+ for last = end; last.Link != nil; last = last.Link {
+ }
+
+ // LEAQ (autoffset+8)(SP), DI
+ p = obj.Appendp(last, newprog)
+ p.As = ALEAQ
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_SP
+ p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_DI
+ if ctxt.Arch.Family == sys.I386 {
+ p.As = ALEAL
+ }
+
+ // Set jne branch target.
+ jne.To.SetTarget(p)
+
+ // CMPQ panic_argp(BX), DI
+ p = obj.Appendp(p, newprog)
+ p.As = ACMPQ
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_BX
+ p.From.Offset = 0 // Panic.argp
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_DI
+ if ctxt.Arch.Family == sys.I386 {
+ p.As = ACMPL
+ }
+
+ // JNE end
+ p = obj.Appendp(p, newprog)
+ p.As = AJNE
+ p.To.Type = obj.TYPE_BRANCH
+ p.To.SetTarget(end)
+
+ // MOVQ SP, panic_argp(BX)
+ p = obj.Appendp(p, newprog)
+ p.As = AMOVQ
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_SP
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = REG_BX
+ p.To.Offset = 0 // Panic.argp
+ if ctxt.Arch.Family == sys.I386 {
+ p.As = AMOVL
+ }
+
+ // JMP end
+ p = obj.Appendp(p, newprog)
+ p.As = obj.AJMP
+ p.To.Type = obj.TYPE_BRANCH
+ p.To.SetTarget(end)
+
+ // Reset p for following code.
+ p = end
+ }
+
+ var deltasp int32
+ for p = cursym.Func().Text; p != nil; p = p.Link {
+ pcsize := ctxt.Arch.RegSize
+ switch p.From.Name {
+ case obj.NAME_AUTO:
+ p.From.Offset += int64(deltasp) - int64(bpsize)
+ case obj.NAME_PARAM:
+ p.From.Offset += int64(deltasp) + int64(pcsize)
+ }
+ if p.GetFrom3() != nil {
+ switch p.GetFrom3().Name {
+ case obj.NAME_AUTO:
+ p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
+ case obj.NAME_PARAM:
+ p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
+ }
+ }
+ switch p.To.Name {
+ case obj.NAME_AUTO:
+ p.To.Offset += int64(deltasp) - int64(bpsize)
+ case obj.NAME_PARAM:
+ p.To.Offset += int64(deltasp) + int64(pcsize)
+ }
+
+ switch p.As {
+ default:
+ continue
+
+ case APUSHL, APUSHFL:
+ deltasp += 4
+ p.Spadj = 4
+ continue
+
+ case APUSHQ, APUSHFQ:
+ deltasp += 8
+ p.Spadj = 8
+ continue
+
+ case APUSHW, APUSHFW:
+ deltasp += 2
+ p.Spadj = 2
+ continue
+
+ case APOPL, APOPFL:
+ deltasp -= 4
+ p.Spadj = -4
+ continue
+
+ case APOPQ, APOPFQ:
+ deltasp -= 8
+ p.Spadj = -8
+ continue
+
+ case APOPW, APOPFW:
+ deltasp -= 2
+ p.Spadj = -2
+ continue
+
+ case AADJSP:
+ p.Spadj = int32(p.From.Offset)
+ deltasp += int32(p.From.Offset)
+ continue
+
+ case obj.ARET:
+ // do nothing
+ }
+
+ if autoffset != deltasp {
+ ctxt.Diag("unbalanced PUSH/POP")
+ }
+
+ if autoffset != 0 {
+ to := p.To // Keep To attached to RET for retjmp below
+ p.To = obj.Addr{}
+ if bpsize > 0 {
+ // Restore caller's BP
+ p.As = AMOVQ
+
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_SP
+ p.From.Scale = 1
+ p.From.Offset = int64(autoffset) - int64(bpsize)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_BP
+ p = obj.Appendp(p, newprog)
+ }
+
+ p.As = AADJSP
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = int64(-autoffset)
+ p.Spadj = -autoffset
+ p = obj.Appendp(p, newprog)
+ p.As = obj.ARET
+ p.To = to
+
+ // If there are instructions following
+ // this ARET, they come from a branch
+ // with the same stackframe, so undo
+ // the cleanup.
+ p.Spadj = +autoffset
+ }
+
+ if p.To.Sym != nil { // retjmp
+ p.As = obj.AJMP
+ }
+ }
+}
+
+func isZeroArgRuntimeCall(s *obj.LSym) bool {
+ if s == nil {
+ return false
+ }
+ switch s.Name {
+ case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
+ return true
+ }
+ if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
+ // These functions do take arguments (in registers),
+ // but use no stack before they do a stack check. We
+ // should include them. See issue 31219.
+ return true
+ }
+ return false
+}
+
+func indir_cx(ctxt *obj.Link, a *obj.Addr) {
+ a.Type = obj.TYPE_MEM
+ a.Reg = REG_CX
+}
+
+// Append code to p to load g into cx.
+// Overwrites p with the first instruction (no first appendp).
+// Overwriting p is unusual but it lets use this in both the
+// prologue (caller must call appendp first) and in the epilogue.
+// Returns last new instruction.
+func load_g_cx(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) *obj.Prog {
+ p.As = AMOVQ
+ if ctxt.Arch.PtrSize == 4 {
+ p.As = AMOVL
+ }
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_TLS
+ p.From.Offset = 0
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_CX
+
+ next := p.Link
+ progedit(ctxt, p, newprog)
+ for p.Link != next {
+ p = p.Link
+ progedit(ctxt, p, newprog)
+ }
+
+ if p.From.Index == REG_TLS {
+ p.From.Scale = 2
+ }
+
+ return p
+}
+
+// Append code to p to check for stack split.
+// Appends to (does not overwrite) p.
+// Assumes g is in CX.
+// Returns last new instruction.
+func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) *obj.Prog {
+ cmp := ACMPQ
+ lea := ALEAQ
+ mov := AMOVQ
+ sub := ASUBQ
+
+ if ctxt.Arch.Family == sys.I386 {
+ cmp = ACMPL
+ lea = ALEAL
+ mov = AMOVL
+ sub = ASUBL
+ }
+
+ var q1 *obj.Prog
+ if framesize <= objabi.StackSmall {
+ // small stack: SP <= stackguard
+ // CMPQ SP, stackguard
+ p = obj.Appendp(p, newprog)
+
+ p.As = cmp
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_SP
+ indir_cx(ctxt, &p.To)
+ p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
+ if cursym.CFunc() {
+ p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
+ }
+
+ // Mark the stack bound check and morestack call async nonpreemptible.
+ // If we get preempted here, when resumed the preemption request is
+ // cleared, but we'll still call morestack, which will double the stack
+ // unnecessarily. See issue #35470.
+ p = ctxt.StartUnsafePoint(p, newprog)
+ } else if framesize <= objabi.StackBig {
+ // large stack: SP-framesize <= stackguard-StackSmall
+ // LEAQ -xxx(SP), AX
+ // CMPQ AX, stackguard
+ p = obj.Appendp(p, newprog)
+
+ p.As = lea
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_SP
+ p.From.Offset = -(int64(framesize) - objabi.StackSmall)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_AX
+
+ p = obj.Appendp(p, newprog)
+ p.As = cmp
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_AX
+ indir_cx(ctxt, &p.To)
+ p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
+ if cursym.CFunc() {
+ p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
+ }
+
+ p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
+ } else {
+ // Such a large stack we need to protect against wraparound.
+ // If SP is close to zero:
+ // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
+ // The +StackGuard on both sides is required to keep the left side positive:
+ // SP is allowed to be slightly below stackguard. See stack.h.
+ //
+ // Preemption sets stackguard to StackPreempt, a very large value.
+ // That breaks the math above, so we have to check for that explicitly.
+ // MOVQ stackguard, SI
+ // CMPQ SI, $StackPreempt
+ // JEQ label-of-call-to-morestack
+ // LEAQ StackGuard(SP), AX
+ // SUBQ SI, AX
+ // CMPQ AX, $(framesize+(StackGuard-StackSmall))
+
+ p = obj.Appendp(p, newprog)
+
+ p.As = mov
+ indir_cx(ctxt, &p.From)
+ p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
+ if cursym.CFunc() {
+ p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
+ }
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_SI
+
+ p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
+
+ p = obj.Appendp(p, newprog)
+ p.As = cmp
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_SI
+ p.To.Type = obj.TYPE_CONST
+ p.To.Offset = objabi.StackPreempt
+ if ctxt.Arch.Family == sys.I386 {
+ p.To.Offset = int64(uint32(objabi.StackPreempt & (1<<32 - 1)))
+ }
+
+ p = obj.Appendp(p, newprog)
+ p.As = AJEQ
+ p.To.Type = obj.TYPE_BRANCH
+ q1 = p
+
+ p = obj.Appendp(p, newprog)
+ p.As = lea
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = REG_SP
+ p.From.Offset = int64(objabi.StackGuard)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_AX
+
+ p = obj.Appendp(p, newprog)
+ p.As = sub
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_SI
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REG_AX
+
+ p = obj.Appendp(p, newprog)
+ p.As = cmp
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = REG_AX
+ p.To.Type = obj.TYPE_CONST
+ p.To.Offset = int64(framesize) + (int64(objabi.StackGuard) - objabi.StackSmall)
+ }
+
+ // common
+ jls := obj.Appendp(p, newprog)
+ jls.As = AJLS
+ jls.To.Type = obj.TYPE_BRANCH
+
+ end := ctxt.EndUnsafePoint(jls, newprog, -1)
+
+ var last *obj.Prog
+ for last = cursym.Func().Text; last.Link != nil; last = last.Link {
+ }
+
+ // Now we are at the end of the function, but logically
+ // we are still in function prologue. We need to fix the
+ // SP data and PCDATA.
+ spfix := obj.Appendp(last, newprog)
+ spfix.As = obj.ANOP
+ spfix.Spadj = -framesize
+
+ pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
+ pcdata = ctxt.StartUnsafePoint(pcdata, newprog)
+
+ call := obj.Appendp(pcdata, newprog)
+ call.Pos = cursym.Func().Text.Pos
+ call.As = obj.ACALL
+ call.To.Type = obj.TYPE_BRANCH
+ call.To.Name = obj.NAME_EXTERN
+ morestack := "runtime.morestack"
+ switch {
+ case cursym.CFunc():
+ morestack = "runtime.morestackc"
+ case !cursym.Func().Text.From.Sym.NeedCtxt():
+ morestack = "runtime.morestack_noctxt"
+ }
+ call.To.Sym = ctxt.Lookup(morestack)
+ // When compiling 386 code for dynamic linking, the call needs to be adjusted
+ // to follow PIC rules. This in turn can insert more instructions, so we need
+ // to keep track of the start of the call (where the jump will be to) and the
+ // end (which following instructions are appended to).
+ callend := call
+ progedit(ctxt, callend, newprog)
+ for ; callend.Link != nil; callend = callend.Link {
+ progedit(ctxt, callend.Link, newprog)
+ }
+
+ pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
+
+ jmp := obj.Appendp(pcdata, newprog)
+ jmp.As = obj.AJMP
+ jmp.To.Type = obj.TYPE_BRANCH
+ jmp.To.SetTarget(cursym.Func().Text.Link)
+ jmp.Spadj = +framesize
+
+ jls.To.SetTarget(call)
+ if q1 != nil {
+ q1.To.SetTarget(call)
+ }
+
+ return end
+}
+
+var unaryDst = map[obj.As]bool{
+ ABSWAPL: true,
+ ABSWAPQ: true,
+ ACLDEMOTE: true,
+ ACLFLUSH: true,
+ ACLFLUSHOPT: true,
+ ACLWB: true,
+ ACMPXCHG16B: true,
+ ACMPXCHG8B: true,
+ ADECB: true,
+ ADECL: true,
+ ADECQ: true,
+ ADECW: true,
+ AFBSTP: true,
+ AFFREE: true,
+ AFLDENV: true,
+ AFSAVE: true,
+ AFSTCW: true,
+ AFSTENV: true,
+ AFSTSW: true,
+ AFXSAVE64: true,
+ AFXSAVE: true,
+ AINCB: true,
+ AINCL: true,
+ AINCQ: true,
+ AINCW: true,
+ ANEGB: true,
+ ANEGL: true,
+ ANEGQ: true,
+ ANEGW: true,
+ ANOTB: true,
+ ANOTL: true,
+ ANOTQ: true,
+ ANOTW: true,
+ APOPL: true,
+ APOPQ: true,
+ APOPW: true,
+ ARDFSBASEL: true,
+ ARDFSBASEQ: true,
+ ARDGSBASEL: true,
+ ARDGSBASEQ: true,
+ ARDRANDL: true,
+ ARDRANDQ: true,
+ ARDRANDW: true,
+ ARDSEEDL: true,
+ ARDSEEDQ: true,
+ ARDSEEDW: true,
+ ASETCC: true,
+ ASETCS: true,
+ ASETEQ: true,
+ ASETGE: true,
+ ASETGT: true,
+ ASETHI: true,
+ ASETLE: true,
+ ASETLS: true,
+ ASETLT: true,
+ ASETMI: true,
+ ASETNE: true,
+ ASETOC: true,
+ ASETOS: true,
+ ASETPC: true,
+ ASETPL: true,
+ ASETPS: true,
+ ASGDT: true,
+ ASIDT: true,
+ ASLDTL: true,
+ ASLDTQ: true,
+ ASLDTW: true,
+ ASMSWL: true,
+ ASMSWQ: true,
+ ASMSWW: true,
+ ASTMXCSR: true,
+ ASTRL: true,
+ ASTRQ: true,
+ ASTRW: true,
+ AXSAVE64: true,
+ AXSAVE: true,
+ AXSAVEC64: true,
+ AXSAVEC: true,
+ AXSAVEOPT64: true,
+ AXSAVEOPT: true,
+ AXSAVES64: true,
+ AXSAVES: true,
+}
+
+var Linkamd64 = obj.LinkArch{
+ Arch: sys.ArchAMD64,
+ Init: instinit,
+ Preprocess: preprocess,
+ Assemble: span6,
+ Progedit: progedit,
+ UnaryDst: unaryDst,
+ DWARFRegisters: AMD64DWARFRegisters,
+}
+
+var Link386 = obj.LinkArch{
+ Arch: sys.Arch386,
+ Init: instinit,
+ Preprocess: preprocess,
+ Assemble: span6,
+ Progedit: progedit,
+ UnaryDst: unaryDst,
+ DWARFRegisters: X86DWARFRegisters,
+}