8 files changed, 4470 insertions, 0 deletions
diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go
new file mode 100644
index 0000000..d2c4197
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/anames.go
@@ -0,0 +1,252 @@
+// Code generated by stringer -i cpu.go -o anames.go -p riscv; DO NOT EDIT.
+
+package riscv
+
+import "cmd/internal/obj"
+
+var Anames = []string{
+	obj.A_ARCHSPECIFIC: "ADDI",
+	"SLTI",
+	"SLTIU",
+	"ANDI",
+	"ORI",
+	"XORI",
+	"SLLI",
+	"SRLI",
+	"SRAI",
+	"LUI",
+	"AUIPC",
+	"ADD",
+	"SLT",
+	"SLTU",
+	"AND",
+	"OR",
+	"XOR",
+	"SLL",
+	"SRL",
+	"SUB",
+	"SRA",
+	"JAL",
+	"JALR",
+	"BEQ",
+	"BNE",
+	"BLT",
+	"BLTU",
+	"BGE",
+	"BGEU",
+	"LW",
+	"LWU",
+	"LH",
+	"LHU",
+	"LB",
+	"LBU",
+	"SW",
+	"SH",
+	"SB",
+	"FENCE",
+	"FENCETSO",
+	"PAUSE",
+	"ADDIW",
+	"SLLIW",
+	"SRLIW",
+	"SRAIW",
+	"ADDW",
+	"SLLW",
+	"SRLW",
+	"SUBW",
+	"SRAW",
+	"LD",
+	"SD",
+	"MUL",
+	"MULH",
+	"MULHU",
+	"MULHSU",
+	"MULW",
+	"DIV",
+	"DIVU",
+	"REM",
+	"REMU",
+	"DIVW",
+	"DIVUW",
+	"REMW",
+	"REMUW",
+	"LRD",
+	"SCD",
+	"LRW",
+	"SCW",
+	"AMOSWAPD",
+	"AMOADDD",
+	"AMOANDD",
+	"AMOORD",
+	"AMOXORD",
+	"AMOMAXD",
+	"AMOMAXUD",
+	"AMOMIND",
+	"AMOMINUD",
+	"AMOSWAPW",
+	"AMOADDW",
+	"AMOANDW",
+	"AMOORW",
+	"AMOXORW",
+	"AMOMAXW",
+	"AMOMAXUW",
+	"AMOMINW",
+	"AMOMINUW",
+	"RDCYCLE",
+	"RDCYCLEH",
+	"RDTIME",
+	"RDTIMEH",
+	"RDINSTRET",
+	"RDINSTRETH",
+	"FRCSR",
+	"FSCSR",
+	"FRRM",
+	"FSRM",
+	"FRFLAGS",
+	"FSFLAGS",
+	"FSRMI",
+	"FSFLAGSI",
+	"FLW",
+	"FSW",
+	"FADDS",
+	"FSUBS",
+	"FMULS",
+	"FDIVS",
+	"FMINS",
+	"FMAXS",
+	"FSQRTS",
+	"FMADDS",
+	"FMSUBS",
+	"FNMADDS",
+	"FNMSUBS",
+	"FCVTWS",
+	"FCVTLS",
+	"FCVTSW",
+	"FCVTSL",
+	"FCVTWUS",
+	"FCVTLUS",
+	"FCVTSWU",
+	"FCVTSLU",
+	"FSGNJS",
+	"FSGNJNS",
+	"FSGNJXS",
+	"FMVXS",
+	"FMVSX",
+	"FMVXW",
+	"FMVWX",
+	"FEQS",
+	"FLTS",
+	"FLES",
+	"FCLASSS",
+	"FLD",
+	"FSD",
+	"FADDD",
+	"FSUBD",
+	"FMULD",
+	"FDIVD",
+	"FMIND",
+	"FMAXD",
+	"FSQRTD",
+	"FMADDD",
+	"FMSUBD",
+	"FNMADDD",
+	"FNMSUBD",
+	"FCVTWD",
+	"FCVTLD",
+	"FCVTDW",
+	"FCVTDL",
+	"FCVTWUD",
+	"FCVTLUD",
+	"FCVTDWU",
+	"FCVTDLU",
+	"FCVTSD",
+	"FCVTDS",
+	"FSGNJD",
+	"FSGNJND",
+	"FSGNJXD",
+	"FMVXD",
+	"FMVDX",
+	"FEQD",
+	"FLTD",
+	"FLED",
+	"FCLASSD",
+	"FLQ",
+	"FSQ",
+	"FADDQ",
+	"FSUBQ",
+	"FMULQ",
+	"FDIVQ",
+	"FMINQ",
+	"FMAXQ",
+	"FSQRTQ",
+	"FMADDQ",
+	"FMSUBQ",
+	"FNMADDQ",
+	"FNMSUBQ",
+	"FCVTWQ",
+	"FCVTLQ",
+	"FCVTSQ",
+	"FCVTDQ",
+	"FCVTQW",
+	"FCVTQL",
+	"FCVTQS",
+	"FCVTQD",
+	"FCVTWUQ",
+	"FCVTLUQ",
+	"FCVTQWU",
+	"FCVTQLU",
+	"FSGNJQ",
+	"FSGNJNQ",
+	"FSGNJXQ",
+	"FEQQ",
+	"FLEQ",
+	"FLTQ",
+	"FCLASSQ",
+	"CSRRW",
+	"CSRRS",
+	"CSRRC",
+	"CSRRWI",
+	"CSRRSI",
+	"CSRRCI",
+	"ECALL",
+	"SCALL",
+	"EBREAK",
+	"SBREAK",
+	"MRET",
+	"SRET",
+	"DRET",
+	"WFI",
+	"SFENCEVMA",
+	"WORD",
+	"BEQZ",
+	"BGEZ",
+	"BGT",
+	"BGTU",
+	"BGTZ",
+	"BLE",
+	"BLEU",
+	"BLEZ",
+	"BLTZ",
+	"BNEZ",
+	"FABSD",
+	"FABSS",
+	"FNEGD",
+	"FNEGS",
+	"FNED",
+	"FNES",
+	"MOV",
+	"MOVB",
+	"MOVBU",
+	"MOVF",
+	"MOVD",
+	"MOVH",
+	"MOVHU",
+	"MOVW",
+	"MOVWU",
+	"NEG",
+	"NEGW",
+	"NOT",
+	"SEQZ",
+	"SNEZ",
+	"LAST",
+}
diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go
new file mode 100644
index 0000000..96ea230
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/asm_test.go
@@ -0,0 +1,311 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv
+
+import (
+	"bytes"
+	"fmt"
+	"internal/testenv"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+// TestLargeBranch generates a large function with a very far conditional
+// branch, in order to ensure that it assembles successfully.
+func TestLargeBranch(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping test in short mode")
+	}
+	testenv.MustHaveGoBuild(t)
+
+	dir, err := os.MkdirTemp("", "testlargebranch")
+	if err != nil {
+		t.Fatalf("Could not create directory: %v", err)
+	}
+	defer os.RemoveAll(dir)
+
+	// Generate a very large function.
+	buf := bytes.NewBuffer(make([]byte, 0, 7000000))
+	genLargeBranch(buf)
+
+	tmpfile := filepath.Join(dir, "x.s")
+	if err := os.WriteFile(tmpfile, buf.Bytes(), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v", err)
+	}
+
+	// Assemble generated file.
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
+	cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("Build failed: %v, output: %s", err, out)
+	}
+}
+
+func genLargeBranch(buf *bytes.Buffer) {
+	fmt.Fprintln(buf, "TEXT f(SB),0,$0-0")
+	fmt.Fprintln(buf, "BEQ X0, X0, label")
+	for i := 0; i < 1<<19; i++ {
+		fmt.Fprintln(buf, "ADD $0, X0, X0")
+	}
+	fmt.Fprintln(buf, "label:")
+	fmt.Fprintln(buf, "ADD $0, X0, X0")
+}
+
+// TestLargeCall generates a large function (>1MB of text) with a call to
+// a following function, in order to ensure that it assembles and links
+// correctly.
+func TestLargeCall(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping test in short mode")
+	}
+	testenv.MustHaveGoBuild(t)
+
+	dir, err := os.MkdirTemp("", "testlargecall")
+	if err != nil {
+		t.Fatalf("could not create directory: %v", err)
+	}
+	defer os.RemoveAll(dir)
+
+	if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module largecall"), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v\n", err)
+	}
+	main := `package main
+func main() {
+        x()
+}
+
+func x()
+func y()
+`
+	if err := os.WriteFile(filepath.Join(dir, "x.go"), []byte(main), 0644); err != nil {
+		t.Fatalf("failed to write main: %v\n", err)
+	}
+
+	// Generate a very large function with call.
+	buf := bytes.NewBuffer(make([]byte, 0, 7000000))
+	genLargeCall(buf)
+
+	if err := os.WriteFile(filepath.Join(dir, "x.s"), buf.Bytes(), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v\n", err)
+	}
+
+	// Build generated files.
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-ldflags=-linkmode=internal")
+	cmd.Dir = dir
+	cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("Build failed: %v, output: %s", err, out)
+	}
+
+	if runtime.GOARCH == "riscv64" && testenv.HasCGO() {
+		cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-ldflags=-linkmode=external")
+		cmd.Dir = dir
+		cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			t.Errorf("Build failed: %v, output: %s", err, out)
+		}
+	}
+}
+
+func genLargeCall(buf *bytes.Buffer) {
+	fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0")
+	fmt.Fprintln(buf, "CALL ·y(SB)")
+	for i := 0; i < 1<<19; i++ {
+		fmt.Fprintln(buf, "ADD $0, X0, X0")
+	}
+	fmt.Fprintln(buf, "RET")
+	fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0")
+	fmt.Fprintln(buf, "ADD $0, X0, X0")
+	fmt.Fprintln(buf, "RET")
+}
+
+// TestLargeJump generates a large jump (>1MB of text) with a JMP to the
+// end of the function, in order to ensure that it assembles correctly.
+func TestLargeJump(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping test in short mode")
+	}
+	if runtime.GOARCH != "riscv64" {
+		t.Skip("Require riscv64 to run")
+	}
+	testenv.MustHaveGoBuild(t)
+
+	dir := t.TempDir()
+
+	if err := os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module largejump"), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v\n", err)
+	}
+	main := `package main
+
+import "fmt"
+
+func main() {
+        fmt.Print(x())
+}
+
+func x() uint64
+`
+	if err := os.WriteFile(filepath.Join(dir, "x.go"), []byte(main), 0644); err != nil {
+		t.Fatalf("failed to write main: %v\n", err)
+	}
+
+	// Generate a very large jump instruction.
+	buf := bytes.NewBuffer(make([]byte, 0, 7000000))
+	genLargeJump(buf)
+
+	if err := os.WriteFile(filepath.Join(dir, "x.s"), buf.Bytes(), 0644); err != nil {
+		t.Fatalf("Failed to write file: %v\n", err)
+	}
+
+	// Build generated files.
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "build", "-o", "x.exe")
+	cmd.Dir = dir
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("Build failed: %v, output: %s", err, out)
+	}
+
+	cmd = testenv.Command(t, filepath.Join(dir, "x.exe"))
+	out, err = cmd.CombinedOutput()
+	if string(out) != "1" {
+		t.Errorf(`Got test output %q, want "1"`, string(out))
+	}
+}
+
+func genLargeJump(buf *bytes.Buffer) {
+	fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-8")
+	fmt.Fprintln(buf, "MOV  X0, X10")
+	fmt.Fprintln(buf, "JMP end")
+	for i := 0; i < 1<<18; i++ {
+		fmt.Fprintln(buf, "ADD $1, X10, X10")
+	}
+	fmt.Fprintln(buf, "end:")
+	fmt.Fprintln(buf, "ADD $1, X10, X10")
+	fmt.Fprintln(buf, "MOV X10, r+0(FP)")
+	fmt.Fprintln(buf, "RET")
+}
+
+// Issue 20348.
+func TestNoRet(t *testing.T) {
+	dir, err := os.MkdirTemp("", "testnoret")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+	tmpfile := filepath.Join(dir, "x.s")
+	if err := os.WriteFile(tmpfile, []byte("TEXT ·stub(SB),$0-0\nNOP\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
+	cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Errorf("%v\n%s", err, out)
+	}
+}
+
+func TestImmediateSplitting(t *testing.T) {
+	dir, err := os.MkdirTemp("", "testimmsplit")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(dir)
+	tmpfile := filepath.Join(dir, "x.s")
+	asm := `
+TEXT _stub(SB),$0-0
+	LB	4096(X5), X6
+	LH	4096(X5), X6
+	LW	4096(X5), X6
+	LD	4096(X5), X6
+	LBU	4096(X5), X6
+	LHU	4096(X5), X6
+	LWU	4096(X5), X6
+	SB	X6, 4096(X5)
+	SH	X6, 4096(X5)
+	SW	X6, 4096(X5)
+	SD	X6, 4096(X5)
+
+	FLW	4096(X5), F6
+	FLD	4096(X5), F6
+	FSW	F6, 4096(X5)
+	FSD	F6, 4096(X5)
+
+	MOVB	4096(X5), X6
+	MOVH	4096(X5), X6
+	MOVW	4096(X5), X6
+	MOV	4096(X5), X6
+	MOVBU	4096(X5), X6
+	MOVHU	4096(X5), X6
+	MOVWU	4096(X5), X6
+
+	MOVB	X6, 4096(X5)
+	MOVH	X6, 4096(X5)
+	MOVW	X6, 4096(X5)
+	MOV	X6, 4096(X5)
+
+	MOVF	4096(X5), F6
+	MOVD	4096(X5), F6
+	MOVF	F6, 4096(X5)
+	MOVD	F6, 4096(X5)
+`
+	if err := os.WriteFile(tmpfile, []byte(asm), 0644); err != nil {
+		t.Fatal(err)
+	}
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), tmpfile)
+	cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		t.Errorf("%v\n%s", err, out)
+	}
+}
+
+func TestBranch(t *testing.T) {
+	if runtime.GOARCH != "riscv64" {
+		t.Skip("Requires riscv64 to run")
+	}
+
+	testenv.MustHaveGoBuild(t)
+
+	cmd := testenv.Command(t, testenv.GoToolPath(t), "test")
+	cmd.Dir = "testdata/testbranch"
+	if out, err := testenv.CleanCmdEnv(cmd).CombinedOutput(); err != nil {
+		t.Errorf("Branch test failed: %v\n%s", err, out)
+	}
+}
+
+func TestPCAlign(t *testing.T) {
+	dir := t.TempDir()
+	tmpfile := filepath.Join(dir, "x.s")
+	asm := `
+TEXT _stub(SB),$0-0
+	FENCE
+	PCALIGN	$8
+	FENCE
+	RET
+`
+	if err := os.WriteFile(tmpfile, []byte(asm), 0644); err != nil {
+		t.Fatal(err)
+	}
+	cmd := exec.Command(testenv.GoToolPath(t), "tool", "asm", "-o", filepath.Join(dir, "x.o"), "-S", tmpfile)
+	cmd.Env = append(os.Environ(), "GOARCH=riscv64", "GOOS=linux")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("Failed to assemble: %v\n%s", err, out)
+	}
+	// The expected instruction sequence after alignment:
+	//	FENCE
+	//	NOP
+	//	FENCE
+	//	RET
+	want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00"
+	if !strings.Contains(string(out), want) {
+		t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want)
+	}
+}
diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go
new file mode 100644
index 0000000..edd1ac8
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/cpu.go
@@ -0,0 +1,654 @@
+//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
+//	Portions Copyright © 1997-1999 Vita Nuova Limited
+//	Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com)
+//	Portions Copyright © 2004,2006 Bruce Ellis
+//	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
+//	Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others
+//	Portions Copyright © 2009 The Go Authors.  All rights reserved.
+//	Portions Copyright © 2019 The Go Authors.  All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package riscv
+
+import "cmd/internal/obj"
+
+//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p riscv
+
+const (
+	// Base register numberings.
+	REG_X0 = obj.RBaseRISCV + iota
+	REG_X1
+	REG_X2
+	REG_X3
+	REG_X4
+	REG_X5
+	REG_X6
+	REG_X7
+	REG_X8
+	REG_X9
+	REG_X10
+	REG_X11
+	REG_X12
+	REG_X13
+	REG_X14
+	REG_X15
+	REG_X16
+	REG_X17
+	REG_X18
+	REG_X19
+	REG_X20
+	REG_X21
+	REG_X22
+	REG_X23
+	REG_X24
+	REG_X25
+	REG_X26
+	REG_X27
+	REG_X28
+	REG_X29
+	REG_X30
+	REG_X31
+
+	// FP register numberings.
+	REG_F0
+	REG_F1
+	REG_F2
+	REG_F3
+	REG_F4
+	REG_F5
+	REG_F6
+	REG_F7
+	REG_F8
+	REG_F9
+	REG_F10
+	REG_F11
+	REG_F12
+	REG_F13
+	REG_F14
+	REG_F15
+	REG_F16
+	REG_F17
+	REG_F18
+	REG_F19
+	REG_F20
+	REG_F21
+	REG_F22
+	REG_F23
+	REG_F24
+	REG_F25
+	REG_F26
+	REG_F27
+	REG_F28
+	REG_F29
+	REG_F30
+	REG_F31
+
+	// This marks the end of the register numbering.
+	REG_END
+
+	// General registers reassigned to ABI names.
+	REG_ZERO = REG_X0
+	REG_RA   = REG_X1 // aka REG_LR
+	REG_SP   = REG_X2
+	REG_GP   = REG_X3 // aka REG_SB
+	REG_TP   = REG_X4
+	REG_T0   = REG_X5
+	REG_T1   = REG_X6
+	REG_T2   = REG_X7
+	REG_S0   = REG_X8
+	REG_S1   = REG_X9
+	REG_A0   = REG_X10
+	REG_A1   = REG_X11
+	REG_A2   = REG_X12
+	REG_A3   = REG_X13
+	REG_A4   = REG_X14
+	REG_A5   = REG_X15
+	REG_A6   = REG_X16
+	REG_A7   = REG_X17
+	REG_S2   = REG_X18
+	REG_S3   = REG_X19
+	REG_S4   = REG_X20
+	REG_S5   = REG_X21
+	REG_S6   = REG_X22
+	REG_S7   = REG_X23
+	REG_S8   = REG_X24
+	REG_S9   = REG_X25
+	REG_S10  = REG_X26 // aka REG_CTXT
+	REG_S11  = REG_X27 // aka REG_G
+	REG_T3   = REG_X28
+	REG_T4   = REG_X29
+	REG_T5   = REG_X30
+	REG_T6   = REG_X31 // aka REG_TMP
+
+	// Go runtime register names.
+	REG_CTXT = REG_S10 // Context for closures.
+	REG_G    = REG_S11 // G pointer.
+	REG_LR   = REG_RA  // Link register.
+	REG_TMP  = REG_T6  // Reserved for assembler use.
+
+	// ABI names for floating point registers.
+	REG_FT0  = REG_F0
+	REG_FT1  = REG_F1
+	REG_FT2  = REG_F2
+	REG_FT3  = REG_F3
+	REG_FT4  = REG_F4
+	REG_FT5  = REG_F5
+	REG_FT6  = REG_F6
+	REG_FT7  = REG_F7
+	REG_FS0  = REG_F8
+	REG_FS1  = REG_F9
+	REG_FA0  = REG_F10
+	REG_FA1  = REG_F11
+	REG_FA2  = REG_F12
+	REG_FA3  = REG_F13
+	REG_FA4  = REG_F14
+	REG_FA5  = REG_F15
+	REG_FA6  = REG_F16
+	REG_FA7  = REG_F17
+	REG_FS2  = REG_F18
+	REG_FS3  = REG_F19
+	REG_FS4  = REG_F20
+	REG_FS5  = REG_F21
+	REG_FS6  = REG_F22
+	REG_FS7  = REG_F23
+	REG_FS8  = REG_F24
+	REG_FS9  = REG_F25
+	REG_FS10 = REG_F26
+	REG_FS11 = REG_F27
+	REG_FT8  = REG_F28
+	REG_FT9  = REG_F29
+	REG_FT10 = REG_F30
+	REG_FT11 = REG_F31
+
+	// Names generated by the SSA compiler.
+	REGSP = REG_SP
+	REGG  = REG_G
+)
+
+// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc#dwarf-register-numbers
+var RISCV64DWARFRegisters = map[int16]int16{
+	// Integer Registers.
+	REG_X0:  0,
+	REG_X1:  1,
+	REG_X2:  2,
+	REG_X3:  3,
+	REG_X4:  4,
+	REG_X5:  5,
+	REG_X6:  6,
+	REG_X7:  7,
+	REG_X8:  8,
+	REG_X9:  9,
+	REG_X10: 10,
+	REG_X11: 11,
+	REG_X12: 12,
+	REG_X13: 13,
+	REG_X14: 14,
+	REG_X15: 15,
+	REG_X16: 16,
+	REG_X17: 17,
+	REG_X18: 18,
+	REG_X19: 19,
+	REG_X20: 20,
+	REG_X21: 21,
+	REG_X22: 22,
+	REG_X23: 23,
+	REG_X24: 24,
+	REG_X25: 25,
+	REG_X26: 26,
+	REG_X27: 27,
+	REG_X28: 28,
+	REG_X29: 29,
+	REG_X30: 30,
+	REG_X31: 31,
+
+	// Floating-Point Registers.
+	REG_F0:  32,
+	REG_F1:  33,
+	REG_F2:  34,
+	REG_F3:  35,
+	REG_F4:  36,
+	REG_F5:  37,
+	REG_F6:  38,
+	REG_F7:  39,
+	REG_F8:  40,
+	REG_F9:  41,
+	REG_F10: 42,
+	REG_F11: 43,
+	REG_F12: 44,
+	REG_F13: 45,
+	REG_F14: 46,
+	REG_F15: 47,
+	REG_F16: 48,
+	REG_F17: 49,
+	REG_F18: 50,
+	REG_F19: 51,
+	REG_F20: 52,
+	REG_F21: 53,
+	REG_F22: 54,
+	REG_F23: 55,
+	REG_F24: 56,
+	REG_F25: 57,
+	REG_F26: 58,
+	REG_F27: 59,
+	REG_F28: 60,
+	REG_F29: 61,
+	REG_F30: 62,
+	REG_F31: 63,
+}
+
+// Prog.Mark flags.
+const (
+	// USES_REG_TMP indicates that a machine instruction generated from the
+	// corresponding *obj.Prog uses the temporary register.
+	USES_REG_TMP = 1 << iota
+
+	// NEED_JAL_RELOC is set on JAL instructions to indicate that a
+	// R_RISCV_JAL relocation is needed.
+	NEED_JAL_RELOC
+
+	// NEED_CALL_RELOC is set on an AUIPC instruction to indicate that it
+	// is the first instruction in an AUIPC + JAL pair that needs a
+	// R_RISCV_CALL relocation.
+	NEED_CALL_RELOC
+
+	// NEED_PCREL_ITYPE_RELOC is set on AUIPC instructions to indicate that
+	// it is the first instruction in an AUIPC + I-type pair that needs a
+	// R_RISCV_PCREL_ITYPE relocation.
+	NEED_PCREL_ITYPE_RELOC
+
+	// NEED_PCREL_STYPE_RELOC is set on AUIPC instructions to indicate that
+	// it is the first instruction in an AUIPC + S-type pair that needs a
+	// R_RISCV_PCREL_STYPE relocation.
+	NEED_PCREL_STYPE_RELOC
+)
+
+// RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files
+// at https://github.com/riscv/riscv-opcodes.
+//
+// As well as some pseudo-mnemonics (e.g. MOV) used only in the assembler.
+//
+// See also "The RISC-V Instruction Set Manual" at https://riscv.org/specifications/.
+//
+// If you modify this table, you MUST run 'go generate' to regenerate anames.go!
+const (
+	// Unprivileged ISA (Document Version 20190608-Base-Ratified)
+
+	// 2.4: Integer Computational Instructions
+	AADDI = obj.ABaseRISCV + obj.A_ARCHSPECIFIC + iota
+	ASLTI
+	ASLTIU
+	AANDI
+	AORI
+	AXORI
+	ASLLI
+	ASRLI
+	ASRAI
+	ALUI
+	AAUIPC
+	AADD
+	ASLT
+	ASLTU
+	AAND
+	AOR
+	AXOR
+	ASLL
+	ASRL
+	ASUB
+	ASRA
+
+	// 2.5: Control Transfer Instructions
+	AJAL
+	AJALR
+	ABEQ
+	ABNE
+	ABLT
+	ABLTU
+	ABGE
+	ABGEU
+
+	// 2.6: Load and Store Instructions
+	ALW
+	ALWU
+	ALH
+	ALHU
+	ALB
+	ALBU
+	ASW
+	ASH
+	ASB
+
+	// 2.7: Memory Ordering Instructions
+	AFENCE
+	AFENCETSO
+	APAUSE
+
+	// 5.2: Integer Computational Instructions (RV64I)
+	AADDIW
+	ASLLIW
+	ASRLIW
+	ASRAIW
+	AADDW
+	ASLLW
+	ASRLW
+	ASUBW
+	ASRAW
+
+	// 5.3: Load and Store Instructions (RV64I)
+	ALD
+	ASD
+
+	// 7.1: Multiplication Operations
+	AMUL
+	AMULH
+	AMULHU
+	AMULHSU
+	AMULW
+	ADIV
+	ADIVU
+	AREM
+	AREMU
+	ADIVW
+	ADIVUW
+	AREMW
+	AREMUW
+
+	// 8.2: Load-Reserved/Store-Conditional Instructions
+	ALRD
+	ASCD
+	ALRW
+	ASCW
+
+	// 8.3: Atomic Memory Operations
+	AAMOSWAPD
+	AAMOADDD
+	AAMOANDD
+	AAMOORD
+	AAMOXORD
+	AAMOMAXD
+	AAMOMAXUD
+	AAMOMIND
+	AAMOMINUD
+	AAMOSWAPW
+	AAMOADDW
+	AAMOANDW
+	AAMOORW
+	AAMOXORW
+	AAMOMAXW
+	AAMOMAXUW
+	AAMOMINW
+	AAMOMINUW
+
+	// 10.1: Base Counters and Timers
+	ARDCYCLE
+	ARDCYCLEH
+	ARDTIME
+	ARDTIMEH
+	ARDINSTRET
+	ARDINSTRETH
+
+	// 11.2: Floating-Point Control and Status Register
+	AFRCSR
+	AFSCSR
+	AFRRM
+	AFSRM
+	AFRFLAGS
+	AFSFLAGS
+	AFSRMI
+	AFSFLAGSI
+
+	// 11.5: Single-Precision Load and Store Instructions
+	AFLW
+	AFSW
+
+	// 11.6: Single-Precision Floating-Point Computational Instructions
+	AFADDS
+	AFSUBS
+	AFMULS
+	AFDIVS
+	AFMINS
+	AFMAXS
+	AFSQRTS
+	AFMADDS
+	AFMSUBS
+	AFNMADDS
+	AFNMSUBS
+
+	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
+	AFCVTWS
+	AFCVTLS
+	AFCVTSW
+	AFCVTSL
+	AFCVTWUS
+	AFCVTLUS
+	AFCVTSWU
+	AFCVTSLU
+	AFSGNJS
+	AFSGNJNS
+	AFSGNJXS
+	AFMVXS
+	AFMVSX
+	AFMVXW
+	AFMVWX
+
+	// 11.8: Single-Precision Floating-Point Compare Instructions
+	AFEQS
+	AFLTS
+	AFLES
+
+	// 11.9: Single-Precision Floating-Point Classify Instruction
+	AFCLASSS
+
+	// 12.3: Double-Precision Load and Store Instructions
+	AFLD
+	AFSD
+
+	// 12.4: Double-Precision Floating-Point Computational Instructions
+	AFADDD
+	AFSUBD
+	AFMULD
+	AFDIVD
+	AFMIND
+	AFMAXD
+	AFSQRTD
+	AFMADDD
+	AFMSUBD
+	AFNMADDD
+	AFNMSUBD
+
+	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
+	AFCVTWD
+	AFCVTLD
+	AFCVTDW
+	AFCVTDL
+	AFCVTWUD
+	AFCVTLUD
+	AFCVTDWU
+	AFCVTDLU
+	AFCVTSD
+	AFCVTDS
+	AFSGNJD
+	AFSGNJND
+	AFSGNJXD
+	AFMVXD
+	AFMVDX
+
+	// 12.6: Double-Precision Floating-Point Compare Instructions
+	AFEQD
+	AFLTD
+	AFLED
+
+	// 12.7: Double-Precision Floating-Point Classify Instruction
+	AFCLASSD
+
+	// 13.1 Quad-Precision Load and Store Instructions
+	AFLQ
+	AFSQ
+
+	// 13.2: Quad-Precision Computational Instructions
+	AFADDQ
+	AFSUBQ
+	AFMULQ
+	AFDIVQ
+	AFMINQ
+	AFMAXQ
+	AFSQRTQ
+	AFMADDQ
+	AFMSUBQ
+	AFNMADDQ
+	AFNMSUBQ
+
+	// 13.3 Quad-Precision Convert and Move Instructions
+	AFCVTWQ
+	AFCVTLQ
+	AFCVTSQ
+	AFCVTDQ
+	AFCVTQW
+	AFCVTQL
+	AFCVTQS
+	AFCVTQD
+	AFCVTWUQ
+	AFCVTLUQ
+	AFCVTQWU
+	AFCVTQLU
+	AFSGNJQ
+	AFSGNJNQ
+	AFSGNJXQ
+
+	// 13.4 Quad-Precision Floating-Point Compare Instructions
+	AFEQQ
+	AFLEQ
+	AFLTQ
+
+	// 13.5 Quad-Precision Floating-Point Classify Instruction
+	AFCLASSQ
+
+	// Privileged ISA (Version 20190608-Priv-MSU-Ratified)
+
+	// 3.1.9: Instructions to Access CSRs
+	ACSRRW
+	ACSRRS
+	ACSRRC
+	ACSRRWI
+	ACSRRSI
+	ACSRRCI
+
+	// 3.2.1: Environment Call and Breakpoint
+	AECALL
+	ASCALL
+	AEBREAK
+	ASBREAK
+
+	// 3.2.2: Trap-Return Instructions
+	AMRET
+	ASRET
+	ADRET
+
+	// 3.2.3: Wait for Interrupt
+	AWFI
+
+	// 4.2.1: Supervisor Memory-Management Fence Instruction
+	ASFENCEVMA
+
+	// The escape hatch. Inserts a single 32-bit word.
+	AWORD
+
+	// Pseudo-instructions.  These get translated by the assembler into other
+	// instructions, based on their operands.
+	ABEQZ
+	ABGEZ
+	ABGT
+	ABGTU
+	ABGTZ
+	ABLE
+	ABLEU
+	ABLEZ
+	ABLTZ
+	ABNEZ
+	AFABSD
+	AFABSS
+	AFNEGD
+	AFNEGS
+	AFNED
+	AFNES
+	AMOV
+	AMOVB
+	AMOVBU
+	AMOVF
+	AMOVD
+	AMOVH
+	AMOVHU
+	AMOVW
+	AMOVWU
+	ANEG
+	ANEGW
+	ANOT
+	ASEQZ
+	ASNEZ
+
+	// End marker
+	ALAST
+)
+
+// All unary instructions which write to their arguments (as opposed to reading
+// from them) go here. The assembly parser uses this information to populate
+// its AST in a semantically reasonable way.
+//
+// Any instructions not listed here are assumed to either be non-unary or to read
+// from its argument.
+var unaryDst = map[obj.As]bool{
+	ARDCYCLE:    true,
+	ARDCYCLEH:   true,
+	ARDTIME:     true,
+	ARDTIMEH:    true,
+	ARDINSTRET:  true,
+	ARDINSTRETH: true,
+}
+
+// Instruction encoding masks.
+const (
+	// BTypeImmMask is a mask including only the immediate portion of
+	// B-type instructions.
+	BTypeImmMask = 0xfe000f80
+
+	// CBTypeImmMask is a mask including only the immediate portion of
+	// CB-type instructions.
+	CBTypeImmMask = 0x1c7c
+
+	// CJTypeImmMask is a mask including only the immediate portion of
+	// CJ-type instructions.
+	CJTypeImmMask = 0x1f7c
+
+	// ITypeImmMask is a mask including only the immediate portion of
+	// I-type instructions.
+	ITypeImmMask = 0xfff00000
+
+	// JTypeImmMask is a mask including only the immediate portion of
+	// J-type instructions.
+	JTypeImmMask = 0xfffff000
+
+	// STypeImmMask is a mask including only the immediate portion of
+	// S-type instructions.
+	STypeImmMask = 0xfe000f80
+
+	// UTypeImmMask is a mask including only the immediate portion of
+	// U-type instructions.
+	UTypeImmMask = 0xfffff000
+)
diff --git a/src/cmd/internal/obj/riscv/inst.go b/src/cmd/internal/obj/riscv/inst.go
new file mode 100644
index 0000000..6cb11cd
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/inst.go
@@ -0,0 +1,442 @@
+// Code generated by parse.py -go rv64_a rv64_d rv64_f rv64_i rv64_m rv64_q rv_a rv_d rv_f rv_i rv_m rv_q rv_s rv_system rv_zicsr; DO NOT EDIT.
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+	opcode uint32
+	funct3 uint32
+	rs2    uint32
+	csr    int64
+	funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+	switch a {
+	case AADD:
+		return &inst{0x33, 0x0, 0x0, 0, 0x0}
+	case AADDI:
+		return &inst{0x13, 0x0, 0x0, 0, 0x0}
+	case AADDIW:
+		return &inst{0x1b, 0x0, 0x0, 0, 0x0}
+	case AADDW:
+		return &inst{0x3b, 0x0, 0x0, 0, 0x0}
+	case AAMOADDD:
+		return &inst{0x2f, 0x3, 0x0, 0, 0x0}
+	case AAMOADDW:
+		return &inst{0x2f, 0x2, 0x0, 0, 0x0}
+	case AAMOANDD:
+		return &inst{0x2f, 0x3, 0x0, 1536, 0x30}
+	case AAMOANDW:
+		return &inst{0x2f, 0x2, 0x0, 1536, 0x30}
+	case AAMOMAXD:
+		return &inst{0x2f, 0x3, 0x0, -1536, 0x50}
+	case AAMOMAXW:
+		return &inst{0x2f, 0x2, 0x0, -1536, 0x50}
+	case AAMOMAXUD:
+		return &inst{0x2f, 0x3, 0x0, -512, 0x70}
+	case AAMOMAXUW:
+		return &inst{0x2f, 0x2, 0x0, -512, 0x70}
+	case AAMOMIND:
+		return &inst{0x2f, 0x3, 0x0, -2048, 0x40}
+	case AAMOMINW:
+		return &inst{0x2f, 0x2, 0x0, -2048, 0x40}
+	case AAMOMINUD:
+		return &inst{0x2f, 0x3, 0x0, -1024, 0x60}
+	case AAMOMINUW:
+		return &inst{0x2f, 0x2, 0x0, -1024, 0x60}
+	case AAMOORD:
+		return &inst{0x2f, 0x3, 0x0, 1024, 0x20}
+	case AAMOORW:
+		return &inst{0x2f, 0x2, 0x0, 1024, 0x20}
+	case AAMOSWAPD:
+		return &inst{0x2f, 0x3, 0x0, 128, 0x4}
+	case AAMOSWAPW:
+		return &inst{0x2f, 0x2, 0x0, 128, 0x4}
+	case AAMOXORD:
+		return &inst{0x2f, 0x3, 0x0, 512, 0x10}
+	case AAMOXORW:
+		return &inst{0x2f, 0x2, 0x0, 512, 0x10}
+	case AAND:
+		return &inst{0x33, 0x7, 0x0, 0, 0x0}
+	case AANDI:
+		return &inst{0x13, 0x7, 0x0, 0, 0x0}
+	case AAUIPC:
+		return &inst{0x17, 0x0, 0x0, 0, 0x0}
+	case ABEQ:
+		return &inst{0x63, 0x0, 0x0, 0, 0x0}
+	case ABGE:
+		return &inst{0x63, 0x5, 0x0, 0, 0x0}
+	case ABGEU:
+		return &inst{0x63, 0x7, 0x0, 0, 0x0}
+	case ABLT:
+		return &inst{0x63, 0x4, 0x0, 0, 0x0}
+	case ABLTU:
+		return &inst{0x63, 0x6, 0x0, 0, 0x0}
+	case ABNE:
+		return &inst{0x63, 0x1, 0x0, 0, 0x0}
+	case ACSRRC:
+		return &inst{0x73, 0x3, 0x0, 0, 0x0}
+	case ACSRRCI:
+		return &inst{0x73, 0x7, 0x0, 0, 0x0}
+	case ACSRRS:
+		return &inst{0x73, 0x2, 0x0, 0, 0x0}
+	case ACSRRSI:
+		return &inst{0x73, 0x6, 0x0, 0, 0x0}
+	case ACSRRW:
+		return &inst{0x73, 0x1, 0x0, 0, 0x0}
+	case ACSRRWI:
+		return &inst{0x73, 0x5, 0x0, 0, 0x0}
+	case ADIV:
+		return &inst{0x33, 0x4, 0x0, 32, 0x1}
+	case ADIVU:
+		return &inst{0x33, 0x5, 0x0, 32, 0x1}
+	case ADIVUW:
+		return &inst{0x3b, 0x5, 0x0, 32, 0x1}
+	case ADIVW:
+		return &inst{0x3b, 0x4, 0x0, 32, 0x1}
+	case ADRET:
+		return &inst{0x73, 0x0, 0x12, 1970, 0x3d}
+	case AEBREAK:
+		return &inst{0x73, 0x0, 0x1, 1, 0x0}
+	case AECALL:
+		return &inst{0x73, 0x0, 0x0, 0, 0x0}
+	case AFADDD:
+		return &inst{0x53, 0x0, 0x0, 32, 0x1}
+	case AFADDQ:
+		return &inst{0x53, 0x0, 0x0, 96, 0x3}
+	case AFADDS:
+		return &inst{0x53, 0x0, 0x0, 0, 0x0}
+	case AFCLASSD:
+		return &inst{0x53, 0x1, 0x0, -480, 0x71}
+	case AFCLASSQ:
+		return &inst{0x53, 0x1, 0x0, -416, 0x73}
+	case AFCLASSS:
+		return &inst{0x53, 0x1, 0x0, -512, 0x70}
+	case AFCVTDL:
+		return &inst{0x53, 0x0, 0x2, -734, 0x69}
+	case AFCVTDLU:
+		return &inst{0x53, 0x0, 0x3, -733, 0x69}
+	case AFCVTDQ:
+		return &inst{0x53, 0x0, 0x3, 1059, 0x21}
+	case AFCVTDS:
+		return &inst{0x53, 0x0, 0x0, 1056, 0x21}
+	case AFCVTDW:
+		return &inst{0x53, 0x0, 0x0, -736, 0x69}
+	case AFCVTDWU:
+		return &inst{0x53, 0x0, 0x1, -735, 0x69}
+	case AFCVTLD:
+		return &inst{0x53, 0x0, 0x2, -990, 0x61}
+	case AFCVTLQ:
+		return &inst{0x53, 0x0, 0x2, -926, 0x63}
+	case AFCVTLS:
+		return &inst{0x53, 0x0, 0x2, -1022, 0x60}
+	case AFCVTLUD:
+		return &inst{0x53, 0x0, 0x3, -989, 0x61}
+	case AFCVTLUQ:
+		return &inst{0x53, 0x0, 0x3, -925, 0x63}
+	case AFCVTLUS:
+		return &inst{0x53, 0x0, 0x3, -1021, 0x60}
+	case AFCVTQD:
+		return &inst{0x53, 0x0, 0x1, 1121, 0x23}
+	case AFCVTQL:
+		return &inst{0x53, 0x0, 0x2, -670, 0x6b}
+	case AFCVTQLU:
+		return &inst{0x53, 0x0, 0x3, -669, 0x6b}
+	case AFCVTQS:
+		return &inst{0x53, 0x0, 0x0, 1120, 0x23}
+	case AFCVTQW:
+		return &inst{0x53, 0x0, 0x0, -672, 0x6b}
+	case AFCVTQWU:
+		return &inst{0x53, 0x0, 0x1, -671, 0x6b}
+	case AFCVTSD:
+		return &inst{0x53, 0x0, 0x1, 1025, 0x20}
+	case AFCVTSL:
+		return &inst{0x53, 0x0, 0x2, -766, 0x68}
+	case AFCVTSLU:
+		return &inst{0x53, 0x0, 0x3, -765, 0x68}
+	case AFCVTSQ:
+		return &inst{0x53, 0x0, 0x3, 1027, 0x20}
+	case AFCVTSW:
+		return &inst{0x53, 0x0, 0x0, -768, 0x68}
+	case AFCVTSWU:
+		return &inst{0x53, 0x0, 0x1, -767, 0x68}
+	case AFCVTWD:
+		return &inst{0x53, 0x0, 0x0, -992, 0x61}
+	case AFCVTWQ:
+		return &inst{0x53, 0x0, 0x0, -928, 0x63}
+	case AFCVTWS:
+		return &inst{0x53, 0x0, 0x0, -1024, 0x60}
+	case AFCVTWUD:
+		return &inst{0x53, 0x0, 0x1, -991, 0x61}
+	case AFCVTWUQ:
+		return &inst{0x53, 0x0, 0x1, -927, 0x63}
+	case AFCVTWUS:
+		return &inst{0x53, 0x0, 0x1, -1023, 0x60}
+	case AFDIVD:
+		return &inst{0x53, 0x0, 0x0, 416, 0xd}
+	case AFDIVQ:
+		return &inst{0x53, 0x0, 0x0, 480, 0xf}
+	case AFDIVS:
+		return &inst{0x53, 0x0, 0x0, 384, 0xc}
+	case AFENCE:
+		return &inst{0xf, 0x0, 0x0, 0, 0x0}
+	case AFENCETSO:
+		return &inst{0xf, 0x0, 0x13, -1997, 0x41}
+	case AFEQD:
+		return &inst{0x53, 0x2, 0x0, -1504, 0x51}
+	case AFEQQ:
+		return &inst{0x53, 0x2, 0x0, -1440, 0x53}
+	case AFEQS:
+		return &inst{0x53, 0x2, 0x0, -1536, 0x50}
+	case AFLD:
+		return &inst{0x7, 0x3, 0x0, 0, 0x0}
+	case AFLED:
+		return &inst{0x53, 0x0, 0x0, -1504, 0x51}
+	case AFLEQ:
+		return &inst{0x53, 0x0, 0x0, -1440, 0x53}
+	case AFLES:
+		return &inst{0x53, 0x0, 0x0, -1536, 0x50}
+	case AFLQ:
+		return &inst{0x7, 0x4, 0x0, 0, 0x0}
+	case AFLTD:
+		return &inst{0x53, 0x1, 0x0, -1504, 0x51}
+	case AFLTQ:
+		return &inst{0x53, 0x1, 0x0, -1440, 0x53}
+	case AFLTS:
+		return &inst{0x53, 0x1, 0x0, -1536, 0x50}
+	case AFLW:
+		return &inst{0x7, 0x2, 0x0, 0, 0x0}
+	case AFMADDD:
+		return &inst{0x43, 0x0, 0x0, 32, 0x1}
+	case AFMADDQ:
+		return &inst{0x43, 0x0, 0x0, 96, 0x3}
+	case AFMADDS:
+		return &inst{0x43, 0x0, 0x0, 0, 0x0}
+	case AFMAXD:
+		return &inst{0x53, 0x1, 0x0, 672, 0x15}
+	case AFMAXQ:
+		return &inst{0x53, 0x1, 0x0, 736, 0x17}
+	case AFMAXS:
+		return &inst{0x53, 0x1, 0x0, 640, 0x14}
+	case AFMIND:
+		return &inst{0x53, 0x0, 0x0, 672, 0x15}
+	case AFMINQ:
+		return &inst{0x53, 0x0, 0x0, 736, 0x17}
+	case AFMINS:
+		return &inst{0x53, 0x0, 0x0, 640, 0x14}
+	case AFMSUBD:
+		return &inst{0x47, 0x0, 0x0, 32, 0x1}
+	case AFMSUBQ:
+		return &inst{0x47, 0x0, 0x0, 96, 0x3}
+	case AFMSUBS:
+		return &inst{0x47, 0x0, 0x0, 0, 0x0}
+	case AFMULD:
+		return &inst{0x53, 0x0, 0x0, 288, 0x9}
+	case AFMULQ:
+		return &inst{0x53, 0x0, 0x0, 352, 0xb}
+	case AFMULS:
+		return &inst{0x53, 0x0, 0x0, 256, 0x8}
+	case AFMVDX:
+		return &inst{0x53, 0x0, 0x0, -224, 0x79}
+	case AFMVSX:
+		return &inst{0x53, 0x0, 0x0, -256, 0x78}
+	case AFMVWX:
+		return &inst{0x53, 0x0, 0x0, -256, 0x78}
+	case AFMVXD:
+		return &inst{0x53, 0x0, 0x0, -480, 0x71}
+	case AFMVXS:
+		return &inst{0x53, 0x0, 0x0, -512, 0x70}
+	case AFMVXW:
+		return &inst{0x53, 0x0, 0x0, -512, 0x70}
+	case AFNMADDD:
+		return &inst{0x4f, 0x0, 0x0, 32, 0x1}
+	case AFNMADDQ:
+		return &inst{0x4f, 0x0, 0x0, 96, 0x3}
+	case AFNMADDS:
+		return &inst{0x4f, 0x0, 0x0, 0, 0x0}
+	case AFNMSUBD:
+		return &inst{0x4b, 0x0, 0x0, 32, 0x1}
+	case AFNMSUBQ:
+		return &inst{0x4b, 0x0, 0x0, 96, 0x3}
+	case AFNMSUBS:
+		return &inst{0x4b, 0x0, 0x0, 0, 0x0}
+	case AFRCSR:
+		return &inst{0x73, 0x2, 0x3, 3, 0x0}
+	case AFRFLAGS:
+		return &inst{0x73, 0x2, 0x1, 1, 0x0}
+	case AFRRM:
+		return &inst{0x73, 0x2, 0x2, 2, 0x0}
+	case AFSCSR:
+		return &inst{0x73, 0x1, 0x3, 3, 0x0}
+	case AFSD:
+		return &inst{0x27, 0x3, 0x0, 0, 0x0}
+	case AFSFLAGS:
+		return &inst{0x73, 0x1, 0x1, 1, 0x0}
+	case AFSFLAGSI:
+		return &inst{0x73, 0x5, 0x1, 1, 0x0}
+	case AFSGNJD:
+		return &inst{0x53, 0x0, 0x0, 544, 0x11}
+	case AFSGNJQ:
+		return &inst{0x53, 0x0, 0x0, 608, 0x13}
+	case AFSGNJS:
+		return &inst{0x53, 0x0, 0x0, 512, 0x10}
+	case AFSGNJND:
+		return &inst{0x53, 0x1, 0x0, 544, 0x11}
+	case AFSGNJNQ:
+		return &inst{0x53, 0x1, 0x0, 608, 0x13}
+	case AFSGNJNS:
+		return &inst{0x53, 0x1, 0x0, 512, 0x10}
+	case AFSGNJXD:
+		return &inst{0x53, 0x2, 0x0, 544, 0x11}
+	case AFSGNJXQ:
+		return &inst{0x53, 0x2, 0x0, 608, 0x13}
+	case AFSGNJXS:
+		return &inst{0x53, 0x2, 0x0, 512, 0x10}
+	case AFSQ:
+		return &inst{0x27, 0x4, 0x0, 0, 0x0}
+	case AFSQRTD:
+		return &inst{0x53, 0x0, 0x0, 1440, 0x2d}
+	case AFSQRTQ:
+		return &inst{0x53, 0x0, 0x0, 1504, 0x2f}
+	case AFSQRTS:
+		return &inst{0x53, 0x0, 0x0, 1408, 0x2c}
+	case AFSRM:
+		return &inst{0x73, 0x1, 0x2, 2, 0x0}
+	case AFSRMI:
+		return &inst{0x73, 0x5, 0x2, 2, 0x0}
+	case AFSUBD:
+		return &inst{0x53, 0x0, 0x0, 160, 0x5}
+	case AFSUBQ:
+		return &inst{0x53, 0x0, 0x0, 224, 0x7}
+	case AFSUBS:
+		return &inst{0x53, 0x0, 0x0, 128, 0x4}
+	case AFSW:
+		return &inst{0x27, 0x2, 0x0, 0, 0x0}
+	case AJAL:
+		return &inst{0x6f, 0x0, 0x0, 0, 0x0}
+	case AJALR:
+		return &inst{0x67, 0x0, 0x0, 0, 0x0}
+	case ALB:
+		return &inst{0x3, 0x0, 0x0, 0, 0x0}
+	case ALBU:
+		return &inst{0x3, 0x4, 0x0, 0, 0x0}
+	case ALD:
+		return &inst{0x3, 0x3, 0x0, 0, 0x0}
+	case ALH:
+		return &inst{0x3, 0x1, 0x0, 0, 0x0}
+	case ALHU:
+		return &inst{0x3, 0x5, 0x0, 0, 0x0}
+	case ALRD:
+		return &inst{0x2f, 0x3, 0x0, 256, 0x8}
+	case ALRW:
+		return &inst{0x2f, 0x2, 0x0, 256, 0x8}
+	case ALUI:
+		return &inst{0x37, 0x0, 0x0, 0, 0x0}
+	case ALW:
+		return &inst{0x3, 0x2, 0x0, 0, 0x0}
+	case ALWU:
+		return &inst{0x3, 0x6, 0x0, 0, 0x0}
+	case AMRET:
+		return &inst{0x73, 0x0, 0x2, 770, 0x18}
+	case AMUL:
+		return &inst{0x33, 0x0, 0x0, 32, 0x1}
+	case AMULH:
+		return &inst{0x33, 0x1, 0x0, 32, 0x1}
+	case AMULHSU:
+		return &inst{0x33, 0x2, 0x0, 32, 0x1}
+	case AMULHU:
+		return &inst{0x33, 0x3, 0x0, 32, 0x1}
+	case AMULW:
+		return &inst{0x3b, 0x0, 0x0, 32, 0x1}
+	case AOR:
+		return &inst{0x33, 0x6, 0x0, 0, 0x0}
+	case AORI:
+		return &inst{0x13, 0x6, 0x0, 0, 0x0}
+	case APAUSE:
+		return &inst{0xf, 0x0, 0x10, 16, 0x0}
+	case ARDCYCLE:
+		return &inst{0x73, 0x2, 0x0, -1024, 0x60}
+	case ARDCYCLEH:
+		return &inst{0x73, 0x2, 0x0, -896, 0x64}
+	case ARDINSTRET:
+		return &inst{0x73, 0x2, 0x2, -1022, 0x60}
+	case ARDINSTRETH:
+		return &inst{0x73, 0x2, 0x2, -894, 0x64}
+	case ARDTIME:
+		return &inst{0x73, 0x2, 0x1, -1023, 0x60}
+	case ARDTIMEH:
+		return &inst{0x73, 0x2, 0x1, -895, 0x64}
+	case AREM:
+		return &inst{0x33, 0x6, 0x0, 32, 0x1}
+	case AREMU:
+		return &inst{0x33, 0x7, 0x0, 32, 0x1}
+	case AREMUW:
+		return &inst{0x3b, 0x7, 0x0, 32, 0x1}
+	case AREMW:
+		return &inst{0x3b, 0x6, 0x0, 32, 0x1}
+	case ASB:
+		return &inst{0x23, 0x0, 0x0, 0, 0x0}
+	case ASBREAK:
+		return &inst{0x73, 0x0, 0x1, 1, 0x0}
+	case ASCD:
+		return &inst{0x2f, 0x3, 0x0, 384, 0xc}
+	case ASCW:
+		return &inst{0x2f, 0x2, 0x0, 384, 0xc}
+	case ASCALL:
+		return &inst{0x73, 0x0, 0x0, 0, 0x0}
+	case ASD:
+		return &inst{0x23, 0x3, 0x0, 0, 0x0}
+	case ASFENCEVMA:
+		return &inst{0x73, 0x0, 0x0, 288, 0x9}
+	case ASH:
+		return &inst{0x23, 0x1, 0x0, 0, 0x0}
+	case ASLL:
+		return &inst{0x33, 0x1, 0x0, 0, 0x0}
+	case ASLLI:
+		return &inst{0x13, 0x1, 0x0, 0, 0x0}
+	case ASLLIW:
+		return &inst{0x1b, 0x1, 0x0, 0, 0x0}
+	case ASLLW:
+		return &inst{0x3b, 0x1, 0x0, 0, 0x0}
+	case ASLT:
+		return &inst{0x33, 0x2, 0x0, 0, 0x0}
+	case ASLTI:
+		return &inst{0x13, 0x2, 0x0, 0, 0x0}
+	case ASLTIU:
+		return &inst{0x13, 0x3, 0x0, 0, 0x0}
+	case ASLTU:
+		return &inst{0x33, 0x3, 0x0, 0, 0x0}
+	case ASRA:
+		return &inst{0x33, 0x5, 0x0, 1024, 0x20}
+	case ASRAI:
+		return &inst{0x13, 0x5, 0x0, 1024, 0x20}
+	case ASRAIW:
+		return &inst{0x1b, 0x5, 0x0, 1024, 0x20}
+	case ASRAW:
+		return &inst{0x3b, 0x5, 0x0, 1024, 0x20}
+	case ASRET:
+		return &inst{0x73, 0x0, 0x2, 258, 0x8}
+	case ASRL:
+		return &inst{0x33, 0x5, 0x0, 0, 0x0}
+	case ASRLI:
+		return &inst{0x13, 0x5, 0x0, 0, 0x0}
+	case ASRLIW:
+		return &inst{0x1b, 0x5, 0x0, 0, 0x0}
+	case ASRLW:
+		return &inst{0x3b, 0x5, 0x0, 0, 0x0}
+	case ASUB:
+		return &inst{0x33, 0x0, 0x0, 1024, 0x20}
+	case ASUBW:
+		return &inst{0x3b, 0x0, 0x0, 1024, 0x20}
+	case ASW:
+		return &inst{0x23, 0x2, 0x0, 0, 0x0}
+	case AWFI:
+		return &inst{0x73, 0x0, 0x5, 261, 0x8}
+	case AXOR:
+		return &inst{0x33, 0x4, 0x0, 0, 0x0}
+	case AXORI:
+		return &inst{0x13, 0x4, 0x0, 0, 0x0}
+	}
+	return nil
+}
diff --git a/src/cmd/internal/obj/riscv/list.go b/src/cmd/internal/obj/riscv/list.go
new file mode 100644
index 0000000..de90961
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/list.go
@@ -0,0 +1,33 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package riscv
+
+import (
+	"fmt"
+
+	"cmd/internal/obj"
+)
+
+func init() {
+	obj.RegisterRegister(obj.RBaseRISCV, REG_END, RegName)
+	obj.RegisterOpcode(obj.ABaseRISCV, Anames)
+}
+
+func RegName(r int) string {
+	switch {
+	case r == 0:
+		return "NONE"
+	case r == REG_G:
+		return "g"
+	case r == REG_SP:
+		return "SP"
+	case REG_X0 <= r && r <= REG_X31:
+		return fmt.Sprintf("X%d", r-REG_X0)
+	case REG_F0 <= r && r <= REG_F31:
+		return fmt.Sprintf("F%d", r-REG_F0)
+	default:
+		return fmt.Sprintf("Rgok(%d)", r-obj.RBaseRISCV)
+	}
+}
diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go
new file mode 100644
index 0000000..11d6c20
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/obj.go
@@ -0,0 +1,2489 @@
+// Copyright © 2015 The Go Authors.  All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package riscv
+
+import (
+	"cmd/internal/obj"
+	"cmd/internal/objabi"
+	"cmd/internal/sys"
+	"fmt"
+	"internal/abi"
+	"log"
+	"math/bits"
+)
+
+func buildop(ctxt *obj.Link) {}
+
+func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) {
+	switch p.As {
+	case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY:
+	default:
+		ctxt.Diag("unexpected Prog in jalToSym: %v", p)
+		return
+	}
+
+	p.As = AJAL
+	p.Mark |= NEED_JAL_RELOC
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = lr
+	p.Reg = obj.REG_NONE
+}
+
+// progedit is called individually for each *obj.Prog. It normalizes instruction
+// formats and eliminates as many pseudo-instructions as possible.
+func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
+
+	// Expand binary instructions to ternary ones.
+	if p.Reg == obj.REG_NONE {
+		switch p.As {
+		case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI,
+			AADDIW, ASLLIW, ASRLIW, ASRAIW, AADDW, ASUBW, ASLLW, ASRLW, ASRAW,
+			AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA,
+			AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW,
+			AREM, AREMU, AREMW, AREMUW:
+			p.Reg = p.To.Reg
+		}
+	}
+
+	// Rewrite instructions with constant operands to refer to the immediate
+	// form of the instruction.
+	if p.From.Type == obj.TYPE_CONST {
+		switch p.As {
+		case AADD:
+			p.As = AADDI
+		case ASUB:
+			p.As, p.From.Offset = AADDI, -p.From.Offset
+		case ASLT:
+			p.As = ASLTI
+		case ASLTU:
+			p.As = ASLTIU
+		case AAND:
+			p.As = AANDI
+		case AOR:
+			p.As = AORI
+		case AXOR:
+			p.As = AXORI
+		case ASLL:
+			p.As = ASLLI
+		case ASRL:
+			p.As = ASRLI
+		case ASRA:
+			p.As = ASRAI
+		case AADDW:
+			p.As = AADDIW
+		case ASUBW:
+			p.As, p.From.Offset = AADDIW, -p.From.Offset
+		case ASLLW:
+			p.As = ASLLIW
+		case ASRLW:
+			p.As = ASRLIW
+		case ASRAW:
+			p.As = ASRAIW
+		}
+	}
+
+	switch p.As {
+	case obj.AJMP:
+		// Turn JMP into JAL ZERO or JALR ZERO.
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_ZERO
+
+		switch p.To.Type {
+		case obj.TYPE_BRANCH:
+			p.As = AJAL
+		case obj.TYPE_MEM:
+			switch p.To.Name {
+			case obj.NAME_NONE:
+				p.As = AJALR
+			case obj.NAME_EXTERN, obj.NAME_STATIC:
+				// Handled in preprocess.
+			default:
+				ctxt.Diag("unsupported name %d for %v", p.To.Name, p)
+			}
+		default:
+			panic(fmt.Sprintf("unhandled type %+v", p.To.Type))
+		}
+
+	case obj.ACALL:
+		switch p.To.Type {
+		case obj.TYPE_MEM:
+			// Handled in preprocess.
+		case obj.TYPE_REG:
+			p.As = AJALR
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = REG_LR
+		default:
+			ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p)
+		}
+
+	case obj.AUNDEF:
+		p.As = AEBREAK
+
+	case ASCALL:
+		// SCALL is the old name for ECALL.
+		p.As = AECALL
+
+	case ASBREAK:
+		// SBREAK is the old name for EBREAK.
+		p.As = AEBREAK
+
+	case AMOV:
+		if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset {
+			ctz := bits.TrailingZeros64(uint64(p.From.Offset))
+			val := p.From.Offset >> ctz
+			if int64(int32(val)) == val {
+				// It's ok. We can handle constants with many trailing zeros.
+				break
+			}
+			// Put >32-bit constants in memory and load them.
+			p.From.Type = obj.TYPE_MEM
+			p.From.Sym = ctxt.Int64Sym(p.From.Offset)
+			p.From.Name = obj.NAME_EXTERN
+			p.From.Offset = 0
+		}
+	}
+}
+
+// addrToReg extracts the register from an Addr, handling special Addr.Names.
+func addrToReg(a obj.Addr) int16 {
+	switch a.Name {
+	case obj.NAME_PARAM, obj.NAME_AUTO:
+		return REG_SP
+	}
+	return a.Reg
+}
+
+// movToLoad converts a MOV mnemonic into the corresponding load instruction.
+func movToLoad(mnemonic obj.As) obj.As {
+	switch mnemonic {
+	case AMOV:
+		return ALD
+	case AMOVB:
+		return ALB
+	case AMOVH:
+		return ALH
+	case AMOVW:
+		return ALW
+	case AMOVBU:
+		return ALBU
+	case AMOVHU:
+		return ALHU
+	case AMOVWU:
+		return ALWU
+	case AMOVF:
+		return AFLW
+	case AMOVD:
+		return AFLD
+	default:
+		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
+	}
+}
+
+// movToStore converts a MOV mnemonic into the corresponding store instruction.
+func movToStore(mnemonic obj.As) obj.As {
+	switch mnemonic {
+	case AMOV:
+		return ASD
+	case AMOVB:
+		return ASB
+	case AMOVH:
+		return ASH
+	case AMOVW:
+		return ASW
+	case AMOVF:
+		return AFSW
+	case AMOVD:
+		return AFSD
+	default:
+		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
+	}
+}
+
+// markRelocs marks an obj.Prog that specifies a MOV pseudo-instruction and
+// requires relocation.
+func markRelocs(p *obj.Prog) {
+	switch p.As {
+	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
+		switch {
+		case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
+			switch p.From.Name {
+			case obj.NAME_EXTERN, obj.NAME_STATIC:
+				p.Mark |= NEED_PCREL_ITYPE_RELOC
+			}
+		case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
+			switch p.From.Name {
+			case obj.NAME_EXTERN, obj.NAME_STATIC:
+				p.Mark |= NEED_PCREL_ITYPE_RELOC
+			}
+		case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
+			switch p.To.Name {
+			case obj.NAME_EXTERN, obj.NAME_STATIC:
+				p.Mark |= NEED_PCREL_STYPE_RELOC
+			}
+		}
+	}
+}
+
+// InvertBranch inverts the condition of a conditional branch.
+func InvertBranch(as obj.As) obj.As {
+	switch as {
+	case ABEQ:
+		return ABNE
+	case ABEQZ:
+		return ABNEZ
+	case ABGE:
+		return ABLT
+	case ABGEU:
+		return ABLTU
+	case ABGEZ:
+		return ABLTZ
+	case ABGT:
+		return ABLE
+	case ABGTU:
+		return ABLEU
+	case ABGTZ:
+		return ABLEZ
+	case ABLE:
+		return ABGT
+	case ABLEU:
+		return ABGTU
+	case ABLEZ:
+		return ABGTZ
+	case ABLT:
+		return ABGE
+	case ABLTU:
+		return ABGEU
+	case ABLTZ:
+		return ABGEZ
+	case ABNE:
+		return ABEQ
+	case ABNEZ:
+		return ABEQZ
+	default:
+		panic("InvertBranch: not a branch")
+	}
+}
+
+// containsCall reports whether the symbol contains a CALL (or equivalent)
+// instruction. Must be called after progedit.
+func containsCall(sym *obj.LSym) bool {
+	// CALLs are CALL or JAL(R) with link register LR.
+	for p := sym.Func().Text; p != nil; p = p.Link {
+		switch p.As {
+		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
+			return true
+		case AJAL, AJALR:
+			if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+// setPCs sets the Pc field in all instructions reachable from p.
+// It uses pc as the initial value and returns the next available pc.
+func setPCs(p *obj.Prog, pc int64) int64 {
+	for ; p != nil; p = p.Link {
+		p.Pc = pc
+		for _, ins := range instructionsForProg(p) {
+			pc += int64(ins.length())
+		}
+
+		if p.As == obj.APCALIGN {
+			alignedValue := p.From.Offset
+			v := pcAlignPadLength(pc, alignedValue)
+			pc += int64(v)
+		}
+	}
+	return pc
+}
+
+// stackOffset updates Addr offsets based on the current stack size.
+//
+// The stack looks like:
+// -------------------
+// |                 |
+// |      PARAMs     |
+// |                 |
+// |                 |
+// -------------------
+// |    Parent RA    |   SP on function entry
+// -------------------
+// |                 |
+// |                 |
+// |       AUTOs     |
+// |                 |
+// |                 |
+// -------------------
+// |        RA       |   SP during function execution
+// -------------------
+//
+// FixedFrameSize makes other packages aware of the space allocated for RA.
+//
+// A nicer version of this diagram can be found on slide 21 of the presentation
+// attached to https://golang.org/issue/16922#issuecomment-243748180.
+func stackOffset(a *obj.Addr, stacksize int64) {
+	switch a.Name {
+	case obj.NAME_AUTO:
+		// Adjust to the top of AUTOs.
+		a.Offset += stacksize
+	case obj.NAME_PARAM:
+		// Adjust to the bottom of PARAMs.
+		a.Offset += stacksize + 8
+	}
+}
+
+// preprocess generates prologue and epilogue code, computes PC-relative branch
+// and jump offsets, and resolves pseudo-registers.
+//
+// preprocess is called once per linker symbol.
+//
+// When preprocess finishes, all instructions in the symbol are either
+// concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
+// PCDATA, and FUNCDATA.
+func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
+	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
+		return
+	}
+
+	// Generate the prologue.
+	text := cursym.Func().Text
+	if text.As != obj.ATEXT {
+		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
+		return
+	}
+
+	stacksize := text.To.Offset
+	if stacksize == -8 {
+		// Historical way to mark NOFRAME.
+		text.From.Sym.Set(obj.AttrNoFrame, true)
+		stacksize = 0
+	}
+	if stacksize < 0 {
+		ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize)
+	}
+	if text.From.Sym.NoFrame() {
+		if stacksize != 0 {
+			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize)
+		}
+	}
+
+	if !containsCall(cursym) {
+		text.From.Sym.Set(obj.AttrLeaf, true)
+		if stacksize == 0 {
+			// A leaf function with no locals has no frame.
+			text.From.Sym.Set(obj.AttrNoFrame, true)
+		}
+	}
+
+	// Save LR unless there is no frame.
+	if !text.From.Sym.NoFrame() {
+		stacksize += ctxt.Arch.FixedFrameSize
+	}
+
+	cursym.Func().Args = text.To.Val.(int32)
+	cursym.Func().Locals = int32(stacksize)
+
+	prologue := text
+
+	if !cursym.Func().Text.From.Sym.NoSplit() {
+		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
+	}
+
+	if stacksize != 0 {
+		prologue = ctxt.StartUnsafePoint(prologue, newprog)
+
+		// Actually save LR.
+		prologue = obj.Appendp(prologue, newprog)
+		prologue.As = AMOV
+		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
+
+		// Insert stack adjustment.
+		prologue = obj.Appendp(prologue, newprog)
+		prologue.As = AADDI
+		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
+		prologue.Reg = REG_SP
+		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+		prologue.Spadj = int32(stacksize)
+
+		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
+
+		// On Linux, in a cgo binary we may get a SIGSETXID signal early on
+		// before the signal stack is set, as glibc doesn't allow us to block
+		// SIGSETXID. So a signal may land on the current stack and clobber
+		// the content below the SP. We store the LR again after the SP is
+		// decremented.
+		prologue = obj.Appendp(prologue, newprog)
+		prologue.As = AMOV
+		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
+	}
+
+	if cursym.Func().Text.From.Sym.Wrapper() {
+		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
+		//
+		//   MOV g_panic(g), X5
+		//   BNE X5, ZERO, adjust
+		// end:
+		//   NOP
+		// ...rest of function..
+		// adjust:
+		//   MOV panic_argp(X5), X6
+		//   ADD $(autosize+FIXED_FRAME), SP, X7
+		//   BNE X6, X7, end
+		//   ADD $FIXED_FRAME, SP, X6
+		//   MOV X6, panic_argp(X5)
+		//   JMP end
+		//
+		// The NOP is needed to give the jumps somewhere to land.
+
+		ldpanic := obj.Appendp(prologue, newprog)
+
+		ldpanic.As = AMOV
+		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
+		ldpanic.Reg = obj.REG_NONE
+		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
+
+		bneadj := obj.Appendp(ldpanic, newprog)
+		bneadj.As = ABNE
+		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
+		bneadj.Reg = REG_ZERO
+		bneadj.To.Type = obj.TYPE_BRANCH
+
+		endadj := obj.Appendp(bneadj, newprog)
+		endadj.As = obj.ANOP
+
+		last := endadj
+		for last.Link != nil {
+			last = last.Link
+		}
+
+		getargp := obj.Appendp(last, newprog)
+		getargp.As = AMOV
+		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
+		getargp.Reg = obj.REG_NONE
+		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
+
+		bneadj.To.SetTarget(getargp)
+
+		calcargp := obj.Appendp(getargp, newprog)
+		calcargp.As = AADDI
+		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.Arch.FixedFrameSize}
+		calcargp.Reg = REG_SP
+		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X7}
+
+		testargp := obj.Appendp(calcargp, newprog)
+		testargp.As = ABNE
+		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
+		testargp.Reg = REG_X7
+		testargp.To.Type = obj.TYPE_BRANCH
+		testargp.To.SetTarget(endadj)
+
+		adjargp := obj.Appendp(testargp, newprog)
+		adjargp.As = AADDI
+		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
+		adjargp.Reg = REG_SP
+		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
+
+		setargp := obj.Appendp(adjargp, newprog)
+		setargp.As = AMOV
+		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
+		setargp.Reg = obj.REG_NONE
+		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
+
+		godone := obj.Appendp(setargp, newprog)
+		godone.As = AJAL
+		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+		godone.To.Type = obj.TYPE_BRANCH
+		godone.To.SetTarget(endadj)
+	}
+
+	// Update stack-based offsets.
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		stackOffset(&p.From, stacksize)
+		stackOffset(&p.To, stacksize)
+	}
+
+	// Additional instruction rewriting.
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		switch p.As {
+		case obj.AGETCALLERPC:
+			if cursym.Leaf() {
+				// MOV LR, Rd
+				p.As = AMOV
+				p.From.Type = obj.TYPE_REG
+				p.From.Reg = REG_LR
+			} else {
+				// MOV (RSP), Rd
+				p.As = AMOV
+				p.From.Type = obj.TYPE_MEM
+				p.From.Reg = REG_SP
+			}
+
+		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
+			switch p.To.Type {
+			case obj.TYPE_MEM:
+				jalToSym(ctxt, p, REG_LR)
+			}
+
+		case obj.AJMP:
+			switch p.To.Type {
+			case obj.TYPE_MEM:
+				switch p.To.Name {
+				case obj.NAME_EXTERN, obj.NAME_STATIC:
+					jalToSym(ctxt, p, REG_ZERO)
+				}
+			}
+
+		case obj.ARET:
+			// Replace RET with epilogue.
+			retJMP := p.To.Sym
+
+			if stacksize != 0 {
+				// Restore LR.
+				p.As = AMOV
+				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
+				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+				p = obj.Appendp(p, newprog)
+
+				p.As = AADDI
+				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
+				p.Reg = REG_SP
+				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+				p.Spadj = int32(-stacksize)
+				p = obj.Appendp(p, newprog)
+			}
+
+			if retJMP != nil {
+				p.As = obj.ARET
+				p.To.Sym = retJMP
+				jalToSym(ctxt, p, REG_ZERO)
+			} else {
+				p.As = AJALR
+				p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+				p.Reg = obj.REG_NONE
+				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+			}
+
+			// "Add back" the stack removed in the previous instruction.
+			//
+			// This is to avoid confusing pctospadj, which sums
+			// Spadj from function entry to each PC, and shouldn't
+			// count adjustments from earlier epilogues, since they
+			// won't affect later PCs.
+			p.Spadj = int32(stacksize)
+
+		case AADDI:
+			// Refine Spadjs account for adjustment via ADDI instruction.
+			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST {
+				p.Spadj = int32(-p.From.Offset)
+			}
+		}
+
+		if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
+			f := cursym.Func()
+			if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
+				f.FuncFlag |= abi.FuncFlagSPWrite
+				if ctxt.Debugvlog || !ctxt.IsAsm {
+					ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
+					if !ctxt.IsAsm {
+						ctxt.Diag("invalid auto-SPWRITE in non-assembly")
+						ctxt.DiagFlush()
+						log.Fatalf("bad SPWRITE")
+					}
+				}
+			}
+		}
+	}
+
+	var callCount int
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		markRelocs(p)
+		if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
+			callCount++
+		}
+	}
+	const callTrampSize = 8 // 2 machine instructions.
+	maxTrampSize := int64(callCount * callTrampSize)
+
+	// Compute instruction addresses.  Once we do that, we need to check for
+	// overextended jumps and branches.  Within each iteration, Pc differences
+	// are always lower bounds (since the program gets monotonically longer,
+	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
+	for {
+		big, rescan := false, false
+		maxPC := setPCs(cursym.Func().Text, 0)
+		if maxPC+maxTrampSize > (1 << 20) {
+			big = true
+		}
+
+		for p := cursym.Func().Text; p != nil; p = p.Link {
+			switch p.As {
+			case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
+				if p.To.Type != obj.TYPE_BRANCH {
+					panic("assemble: instruction with branch-like opcode lacks destination")
+				}
+				offset := p.To.Target().Pc - p.Pc
+				if offset < -4096 || 4096 <= offset {
+					// Branch is long.  Replace it with a jump.
+					jmp := obj.Appendp(p, newprog)
+					jmp.As = AJAL
+					jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+					jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
+					jmp.To.SetTarget(p.To.Target())
+
+					p.As = InvertBranch(p.As)
+					p.To.SetTarget(jmp.Link)
+
+					// We may have made previous branches too long,
+					// so recheck them.
+					rescan = true
+				}
+			case AJAL:
+				// Linker will handle the intersymbol case and trampolines.
+				if p.To.Target() == nil {
+					if !big {
+						break
+					}
+					// This function is going to be too large for JALs
+					// to reach trampolines. Replace with AUIPC+JALR.
+					jmp := obj.Appendp(p, newprog)
+					jmp.As = AJALR
+					jmp.From = p.From
+					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
+
+					p.As = AAUIPC
+					p.Mark = (p.Mark &^ NEED_JAL_RELOC) | NEED_CALL_RELOC
+					p.AddRestSource(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym})
+					p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
+					p.Reg = obj.REG_NONE
+					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
+
+					rescan = true
+					break
+				}
+				offset := p.To.Target().Pc - p.Pc
+				if offset < -(1<<20) || (1<<20) <= offset {
+					// Replace with 2-instruction sequence. This assumes
+					// that TMP is not live across J instructions, since
+					// it is reserved by SSA.
+					jmp := obj.Appendp(p, newprog)
+					jmp.As = AJALR
+					jmp.From = p.From
+					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
+
+					// p.From is not generally valid, however will be
+					// fixed up in the next loop.
+					p.As = AAUIPC
+					p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym}
+					p.From.SetTarget(p.To.Target())
+					p.Reg = obj.REG_NONE
+					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
+
+					rescan = true
+				}
+			}
+		}
+
+		if !rescan {
+			break
+		}
+	}
+
+	// Now that there are no long branches, resolve branch and jump targets.
+	// At this point, instruction rewriting which changes the number of
+	// instructions will break everything--don't do it!
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		switch p.As {
+		case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
+			switch p.To.Type {
+			case obj.TYPE_BRANCH:
+				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
+			case obj.TYPE_MEM:
+				panic("unhandled type")
+			}
+
+		case AJAL:
+			// Linker will handle the intersymbol case and trampolines.
+			if p.To.Target() != nil {
+				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
+			}
+
+		case AAUIPC:
+			if p.From.Type == obj.TYPE_BRANCH {
+				low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
+				if err != nil {
+					ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc)
+				}
+				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym}
+				p.Link.To.Offset = low
+			}
+
+		case obj.APCALIGN:
+			alignedValue := p.From.Offset
+			if (alignedValue&(alignedValue-1) != 0) || 4 > alignedValue || alignedValue > 2048 {
+				ctxt.Diag("alignment value of an instruction must be a power of two and in the range [4, 2048], got %d\n", alignedValue)
+			}
+			// Update the current text symbol alignment value.
+			if int32(alignedValue) > cursym.Func().Align {
+				cursym.Func().Align = int32(alignedValue)
+			}
+		}
+	}
+
+	// Validate all instructions - this provides nice error messages.
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		for _, ins := range instructionsForProg(p) {
+			ins.validate(ctxt)
+		}
+	}
+}
+
+func pcAlignPadLength(pc int64, alignedValue int64) int {
+	return int(-pc & (alignedValue - 1))
+}
+
+func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
+	// Leaf function with no frame is effectively NOSPLIT.
+	if framesize == 0 {
+		return p
+	}
+
+	if ctxt.Flag_maymorestack != "" {
+		// Save LR and REGCTXT
+		const frameSize = 16
+		p = ctxt.StartUnsafePoint(p, newprog)
+
+		// Spill Arguments. This has to happen before we open
+		// any more frame space.
+		p = cursym.Func().SpillRegisterArgs(p, newprog)
+
+		// MOV LR, -16(SP)
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -frameSize}
+		// ADDI $-16, SP
+		p = obj.Appendp(p, newprog)
+		p.As = AADDI
+		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -frameSize}
+		p.Reg = REG_SP
+		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+		p.Spadj = frameSize
+		// MOV REGCTXT, 8(SP)
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
+		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
+
+		// CALL maymorestack
+		p = obj.Appendp(p, newprog)
+		p.As = obj.ACALL
+		p.To.Type = obj.TYPE_BRANCH
+		// See ../x86/obj6.go
+		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
+		jalToSym(ctxt, p, REG_X5)
+
+		// Restore LR and REGCTXT
+
+		// MOV 8(SP), REGCTXT
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
+		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
+		// MOV (SP), LR
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
+		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+		// ADDI $16, SP
+		p = obj.Appendp(p, newprog)
+		p.As = AADDI
+		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: frameSize}
+		p.Reg = REG_SP
+		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+		p.Spadj = -frameSize
+
+		// Unspill arguments
+		p = cursym.Func().UnspillRegisterArgs(p, newprog)
+		p = ctxt.EndUnsafePoint(p, newprog, -1)
+	}
+
+	// Jump back to here after morestack returns.
+	startPred := p
+
+	// MOV	g_stackguard(g), X6
+	p = obj.Appendp(p, newprog)
+	p.As = AMOV
+	p.From.Type = obj.TYPE_MEM
+	p.From.Reg = REGG
+	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
+	if cursym.CFunc() {
+		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
+	}
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = REG_X6
+
+	// Mark the stack bound check and morestack call async nonpreemptible.
+	// If we get preempted here, when resumed the preemption request is
+	// cleared, but we'll still call morestack, which will double the stack
+	// unnecessarily. See issue #35470.
+	p = ctxt.StartUnsafePoint(p, newprog)
+
+	var to_done, to_more *obj.Prog
+
+	if framesize <= abi.StackSmall {
+		// small stack
+		//	// if SP > stackguard { goto done }
+		//	BLTU	stackguard, SP, done
+		p = obj.Appendp(p, newprog)
+		p.As = ABLTU
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X6
+		p.Reg = REG_SP
+		p.To.Type = obj.TYPE_BRANCH
+		to_done = p
+	} else {
+		// large stack: SP-framesize < stackguard-StackSmall
+		offset := int64(framesize) - abi.StackSmall
+		if framesize > abi.StackBig {
+			// Such a large stack we need to protect against underflow.
+			// The runtime guarantees SP > objabi.StackBig, but
+			// framesize is large enough that SP-framesize may
+			// underflow, causing a direct comparison with the
+			// stack guard to incorrectly succeed. We explicitly
+			// guard against underflow.
+			//
+			//	MOV	$(framesize-StackSmall), X7
+			//	BLTU	SP, X7, label-of-call-to-morestack
+
+			p = obj.Appendp(p, newprog)
+			p.As = AMOV
+			p.From.Type = obj.TYPE_CONST
+			p.From.Offset = offset
+			p.To.Type = obj.TYPE_REG
+			p.To.Reg = REG_X7
+
+			p = obj.Appendp(p, newprog)
+			p.As = ABLTU
+			p.From.Type = obj.TYPE_REG
+			p.From.Reg = REG_SP
+			p.Reg = REG_X7
+			p.To.Type = obj.TYPE_BRANCH
+			to_more = p
+		}
+
+		// Check against the stack guard. We've ensured this won't underflow.
+		//	ADD	$-(framesize-StackSmall), SP, X7
+		//	// if X7 > stackguard { goto done }
+		//	BLTU	stackguard, X7, done
+		p = obj.Appendp(p, newprog)
+		p.As = AADDI
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = -offset
+		p.Reg = REG_SP
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X7
+
+		p = obj.Appendp(p, newprog)
+		p.As = ABLTU
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X6
+		p.Reg = REG_X7
+		p.To.Type = obj.TYPE_BRANCH
+		to_done = p
+	}
+
+	// Spill the register args that could be clobbered by the
+	// morestack code
+	p = ctxt.EmitEntryStackMap(cursym, p, newprog)
+	p = cursym.Func().SpillRegisterArgs(p, newprog)
+
+	// CALL runtime.morestack(SB)
+	p = obj.Appendp(p, newprog)
+	p.As = obj.ACALL
+	p.To.Type = obj.TYPE_BRANCH
+
+	if cursym.CFunc() {
+		p.To.Sym = ctxt.Lookup("runtime.morestackc")
+	} else if !cursym.Func().Text.From.Sym.NeedCtxt() {
+		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
+	} else {
+		p.To.Sym = ctxt.Lookup("runtime.morestack")
+	}
+	if to_more != nil {
+		to_more.To.SetTarget(p)
+	}
+	jalToSym(ctxt, p, REG_X5)
+
+	// The instructions which unspill regs should be preemptible.
+	p = ctxt.EndUnsafePoint(p, newprog, -1)
+	p = cursym.Func().UnspillRegisterArgs(p, newprog)
+
+	// JMP start
+	p = obj.Appendp(p, newprog)
+	p.As = AJAL
+	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
+	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+	p.To.SetTarget(startPred.Link)
+
+	// placeholder for to_done's jump target
+	p = obj.Appendp(p, newprog)
+	p.As = obj.ANOP // zero-width place holder
+	to_done.To.SetTarget(p)
+
+	return p
+}
+
+// signExtend sign extends val starting at bit bit.
+func signExtend(val int64, bit uint) int64 {
+	return val << (64 - bit) >> (64 - bit)
+}
+
+// Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit
+// upper immediate and a signed 12-bit lower immediate to be added to the upper
+// result. For example, high may be used in LUI and low in a following ADDI to
+// generate a full 32-bit constant.
+func Split32BitImmediate(imm int64) (low, high int64, err error) {
+	if err := immIFits(imm, 32); err != nil {
+		return 0, 0, err
+	}
+
+	// Nothing special needs to be done if the immediate fits in 12 bits.
+	if err := immIFits(imm, 12); err == nil {
+		return imm, 0, nil
+	}
+
+	high = imm >> 12
+
+	// The bottom 12 bits will be treated as signed.
+	//
+	// If that will result in a negative 12 bit number, add 1 to
+	// our upper bits to adjust for the borrow.
+	//
+	// It is not possible for this increment to overflow. To
+	// overflow, the 20 top bits would be 1, and the sign bit for
+	// the low 12 bits would be set, in which case the entire 32
+	// bit pattern fits in a 12 bit signed value.
+	if imm&(1<<11) != 0 {
+		high++
+	}
+
+	low = signExtend(imm, 12)
+	high = signExtend(high, 20)
+
+	return low, high, nil
+}
+
+func regVal(r, min, max uint32) uint32 {
+	if r < min || r > max {
+		panic(fmt.Sprintf("register out of range, want %d <= %d <= %d", min, r, max))
+	}
+	return r - min
+}
+
+// regI returns an integer register.
+func regI(r uint32) uint32 {
+	return regVal(r, REG_X0, REG_X31)
+}
+
+// regF returns a float register.
+func regF(r uint32) uint32 {
+	return regVal(r, REG_F0, REG_F31)
+}
+
+// regAddr extracts a register from an Addr.
+func regAddr(a obj.Addr, min, max uint32) uint32 {
+	if a.Type != obj.TYPE_REG {
+		panic(fmt.Sprintf("ill typed: %+v", a))
+	}
+	return regVal(uint32(a.Reg), min, max)
+}
+
+// regIAddr extracts the integer register from an Addr.
+func regIAddr(a obj.Addr) uint32 {
+	return regAddr(a, REG_X0, REG_X31)
+}
+
+// regFAddr extracts the float register from an Addr.
+func regFAddr(a obj.Addr) uint32 {
+	return regAddr(a, REG_F0, REG_F31)
+}
+
+// immEven checks that the immediate is a multiple of two. If it
+// is not, an error is returned.
+func immEven(x int64) error {
+	if x&1 != 0 {
+		return fmt.Errorf("immediate %#x is not a multiple of two", x)
+	}
+	return nil
+}
+
+// immIFits checks whether the immediate value x fits in nbits bits
+// as a signed integer. If it does not, an error is returned.
+func immIFits(x int64, nbits uint) error {
+	nbits--
+	min := int64(-1) << nbits
+	max := int64(1)<<nbits - 1
+	if x < min || x > max {
+		if nbits <= 16 {
+			return fmt.Errorf("signed immediate %d must be in range [%d, %d] (%d bits)", x, min, max, nbits)
+		}
+		return fmt.Errorf("signed immediate %#x must be in range [%#x, %#x] (%d bits)", x, min, max, nbits)
+	}
+	return nil
+}
+
+// immI extracts the signed integer of the specified size from an immediate.
+func immI(as obj.As, imm int64, nbits uint) uint32 {
+	if err := immIFits(imm, nbits); err != nil {
+		panic(fmt.Sprintf("%v: %v", as, err))
+	}
+	return uint32(imm)
+}
+
+func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
+	if err := immIFits(imm, nbits); err != nil {
+		ctxt.Diag("%v: %v", ins, err)
+	}
+}
+
+func wantReg(ctxt *obj.Link, ins *instruction, pos string, descr string, r, min, max uint32) {
+	if r < min || r > max {
+		var suffix string
+		if r != obj.REG_NONE {
+			suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r)))
+		}
+		ctxt.Diag("%v: expected %s register in %s position%s", ins, descr, pos, suffix)
+	}
+}
+
+func wantNoneReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
+	if r != obj.REG_NONE {
+		ctxt.Diag("%v: expected no register in %s but got register %s", ins, pos, RegName(int(r)))
+	}
+}
+
+// wantIntReg checks that r is an integer register.
+func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
+	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
+}
+
+// wantFloatReg checks that r is a floating-point register.
+func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
+	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
+}
+
+// wantEvenOffset checks that the offset is a multiple of two.
+func wantEvenOffset(ctxt *obj.Link, ins *instruction, offset int64) {
+	if err := immEven(offset); err != nil {
+		ctxt.Diag("%v: %v", ins, err)
+	}
+}
+
+func validateRIII(ctxt *obj.Link, ins *instruction) {
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantIntReg(ctxt, ins, "rs1", ins.rs1)
+	wantIntReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRFFF(ctxt *obj.Link, ins *instruction) {
+	wantFloatReg(ctxt, ins, "rd", ins.rd)
+	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
+	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRFFFF(ctxt *obj.Link, ins *instruction) {
+	wantFloatReg(ctxt, ins, "rd", ins.rd)
+	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
+	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
+	wantFloatReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRFFI(ctxt *obj.Link, ins *instruction) {
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
+	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRFI(ctxt *obj.Link, ins *instruction) {
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
+	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRIF(ctxt *obj.Link, ins *instruction) {
+	wantFloatReg(ctxt, ins, "rd", ins.rd)
+	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
+	wantIntReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRFF(ctxt *obj.Link, ins *instruction) {
+	wantFloatReg(ctxt, ins, "rd", ins.rd)
+	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
+	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateII(ctxt *obj.Link, ins *instruction) {
+	wantImmI(ctxt, ins, ins.imm, 12)
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantIntReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateIF(ctxt *obj.Link, ins *instruction) {
+	wantImmI(ctxt, ins, ins.imm, 12)
+	wantFloatReg(ctxt, ins, "rd", ins.rd)
+	wantIntReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateSI(ctxt *obj.Link, ins *instruction) {
+	wantImmI(ctxt, ins, ins.imm, 12)
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantIntReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateSF(ctxt *obj.Link, ins *instruction) {
+	wantImmI(ctxt, ins, ins.imm, 12)
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateB(ctxt *obj.Link, ins *instruction) {
+	// Offsets are multiples of two, so accept 13 bit immediates for the
+	// 12 bit slot. We implicitly drop the least significant bit in encodeB.
+	wantEvenOffset(ctxt, ins, ins.imm)
+	wantImmI(ctxt, ins, ins.imm, 13)
+	wantNoneReg(ctxt, ins, "rd", ins.rd)
+	wantIntReg(ctxt, ins, "rs1", ins.rs1)
+	wantIntReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateU(ctxt *obj.Link, ins *instruction) {
+	wantImmI(ctxt, ins, ins.imm, 20)
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateJ(ctxt *obj.Link, ins *instruction) {
+	// Offsets are multiples of two, so accept 21 bit immediates for the
+	// 20 bit slot. We implicitly drop the least significant bit in encodeJ.
+	wantEvenOffset(ctxt, ins, ins.imm)
+	wantImmI(ctxt, ins, ins.imm, 21)
+	wantIntReg(ctxt, ins, "rd", ins.rd)
+	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
+	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
+	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
+}
+
+func validateRaw(ctxt *obj.Link, ins *instruction) {
+	// Treat the raw value specially as a 32-bit unsigned integer.
+	// Nobody wants to enter negative machine code.
+	if ins.imm < 0 || 1<<32 <= ins.imm {
+		ctxt.Diag("%v: immediate %d in raw position cannot be larger than 32 bits", ins.as, ins.imm)
+	}
+}
+
+// extractBitAndShift extracts the specified bit from the given immediate,
+// before shifting it to the requested position and returning it.
+func extractBitAndShift(imm uint32, bit, pos int) uint32 {
+	return ((imm >> bit) & 1) << pos
+}
+
+// encodeR encodes an R-type RISC-V instruction.
+func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
+	enc := encode(as)
+	if enc == nil {
+		panic("encodeR: could not encode instruction")
+	}
+	if enc.rs2 != 0 && rs2 != 0 {
+		panic("encodeR: instruction uses rs2, but rs2 was nonzero")
+	}
+	return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
+}
+
+// encodeR4 encodes an R4-type RISC-V instruction.
+func encodeR4(as obj.As, rs1, rs2, rs3, rd, funct3, funct2 uint32) uint32 {
+	enc := encode(as)
+	if enc == nil {
+		panic("encodeR4: could not encode instruction")
+	}
+	if enc.rs2 != 0 {
+		panic("encodeR4: instruction uses rs2")
+	}
+	funct2 |= enc.funct7
+	if funct2&^3 != 0 {
+		panic("encodeR4: funct2 requires more than 2 bits")
+	}
+	return rs3<<27 | funct2<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
+}
+
+func encodeRIII(ins *instruction) uint32 {
+	return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRFFF(ins *instruction) uint32 {
+	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRFFFF(ins *instruction) uint32 {
+	return encodeR4(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rs3), regF(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRFFI(ins *instruction) uint32 {
+	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRFI(ins *instruction) uint32 {
+	return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRIF(ins *instruction) uint32 {
+	return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
+}
+
+func encodeRFF(ins *instruction) uint32 {
+	return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
+}
+
+// encodeI encodes an I-type RISC-V instruction.
+func encodeI(as obj.As, rs1, rd, imm uint32) uint32 {
+	enc := encode(as)
+	if enc == nil {
+		panic("encodeI: could not encode instruction")
+	}
+	imm |= uint32(enc.csr)
+	return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode
+}
+
+func encodeII(ins *instruction) uint32 {
+	return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm))
+}
+
+func encodeIF(ins *instruction) uint32 {
+	return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm))
+}
+
+// encodeS encodes an S-type RISC-V instruction.
+func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 {
+	enc := encode(as)
+	if enc == nil {
+		panic("encodeS: could not encode instruction")
+	}
+	return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode
+}
+
+func encodeSI(ins *instruction) uint32 {
+	return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm))
+}
+
+func encodeSF(ins *instruction) uint32 {
+	return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
+}
+
+// encodeBImmediate encodes an immediate for a B-type RISC-V instruction.
+func encodeBImmediate(imm uint32) uint32 {
+	return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7
+}
+
+// encodeB encodes a B-type RISC-V instruction.
+func encodeB(ins *instruction) uint32 {
+	imm := immI(ins.as, ins.imm, 13)
+	rs2 := regI(ins.rs1)
+	rs1 := regI(ins.rs2)
+	enc := encode(ins.as)
+	if enc == nil {
+		panic("encodeB: could not encode instruction")
+	}
+	return encodeBImmediate(imm) | rs2<<20 | rs1<<15 | enc.funct3<<12 | enc.opcode
+}
+
+// encodeU encodes a U-type RISC-V instruction.
+func encodeU(ins *instruction) uint32 {
+	// The immediates for encodeU are the upper 20 bits of a 32 bit value.
+	// Rather than have the user/compiler generate a 32 bit constant, the
+	// bottommost bits of which must all be zero, instead accept just the
+	// top bits.
+	imm := immI(ins.as, ins.imm, 20)
+	rd := regI(ins.rd)
+	enc := encode(ins.as)
+	if enc == nil {
+		panic("encodeU: could not encode instruction")
+	}
+	return imm<<12 | rd<<7 | enc.opcode
+}
+
+// encodeJImmediate encodes an immediate for a J-type RISC-V instruction.
+func encodeJImmediate(imm uint32) uint32 {
+	return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12
+}
+
+// encodeJ encodes a J-type RISC-V instruction.
+func encodeJ(ins *instruction) uint32 {
+	imm := immI(ins.as, ins.imm, 21)
+	rd := regI(ins.rd)
+	enc := encode(ins.as)
+	if enc == nil {
+		panic("encodeJ: could not encode instruction")
+	}
+	return encodeJImmediate(imm) | rd<<7 | enc.opcode
+}
+
+// encodeCBImmediate encodes an immediate for a CB-type RISC-V instruction.
+func encodeCBImmediate(imm uint32) uint32 {
+	// Bit order - [8|4:3|7:6|2:1|5]
+	bits := extractBitAndShift(imm, 8, 7)
+	bits |= extractBitAndShift(imm, 4, 6)
+	bits |= extractBitAndShift(imm, 3, 5)
+	bits |= extractBitAndShift(imm, 7, 4)
+	bits |= extractBitAndShift(imm, 6, 3)
+	bits |= extractBitAndShift(imm, 2, 2)
+	bits |= extractBitAndShift(imm, 1, 1)
+	bits |= extractBitAndShift(imm, 5, 0)
+	return (bits>>5)<<10 | (bits&0x1f)<<2
+}
+
+// encodeCJImmediate encodes an immediate for a CJ-type RISC-V instruction.
+func encodeCJImmediate(imm uint32) uint32 {
+	// Bit order - [11|4|9:8|10|6|7|3:1|5]
+	bits := extractBitAndShift(imm, 11, 10)
+	bits |= extractBitAndShift(imm, 4, 9)
+	bits |= extractBitAndShift(imm, 9, 8)
+	bits |= extractBitAndShift(imm, 8, 7)
+	bits |= extractBitAndShift(imm, 10, 6)
+	bits |= extractBitAndShift(imm, 6, 5)
+	bits |= extractBitAndShift(imm, 7, 4)
+	bits |= extractBitAndShift(imm, 3, 3)
+	bits |= extractBitAndShift(imm, 2, 2)
+	bits |= extractBitAndShift(imm, 1, 1)
+	bits |= extractBitAndShift(imm, 5, 0)
+	return bits << 2
+}
+
+func encodeRawIns(ins *instruction) uint32 {
+	// Treat the raw value specially as a 32-bit unsigned integer.
+	// Nobody wants to enter negative machine code.
+	if ins.imm < 0 || 1<<32 <= ins.imm {
+		panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm))
+	}
+	return uint32(ins.imm)
+}
+
+func EncodeBImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 13); err != nil {
+		return 0, err
+	}
+	if err := immEven(imm); err != nil {
+		return 0, err
+	}
+	return int64(encodeBImmediate(uint32(imm))), nil
+}
+
+func EncodeCBImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 9); err != nil {
+		return 0, err
+	}
+	if err := immEven(imm); err != nil {
+		return 0, err
+	}
+	return int64(encodeCBImmediate(uint32(imm))), nil
+}
+
+func EncodeCJImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 12); err != nil {
+		return 0, err
+	}
+	if err := immEven(imm); err != nil {
+		return 0, err
+	}
+	return int64(encodeCJImmediate(uint32(imm))), nil
+}
+
+func EncodeIImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 12); err != nil {
+		return 0, err
+	}
+	return imm << 20, nil
+}
+
+func EncodeJImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 21); err != nil {
+		return 0, err
+	}
+	if err := immEven(imm); err != nil {
+		return 0, err
+	}
+	return int64(encodeJImmediate(uint32(imm))), nil
+}
+
+func EncodeSImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 12); err != nil {
+		return 0, err
+	}
+	return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil
+}
+
+func EncodeUImmediate(imm int64) (int64, error) {
+	if err := immIFits(imm, 20); err != nil {
+		return 0, err
+	}
+	return imm << 12, nil
+}
+
+type encoding struct {
+	encode   func(*instruction) uint32     // encode returns the machine code for an instruction
+	validate func(*obj.Link, *instruction) // validate validates an instruction
+	length   int                           // length of encoded instruction; 0 for pseudo-ops, 4 otherwise
+}
+
+var (
+	// Encodings have the following naming convention:
+	//
+	//  1. the instruction encoding (R/I/S/B/U/J), in lowercase
+	//  2. zero or more register operand identifiers (I = integer
+	//     register, F = float register), in uppercase
+	//  3. the word "Encoding"
+	//
+	// For example, rIIIEncoding indicates an R-type instruction with two
+	// integer register inputs and an integer register output; sFEncoding
+	// indicates an S-type instruction with rs2 being a float register.
+
+	rIIIEncoding  = encoding{encode: encodeRIII, validate: validateRIII, length: 4}
+	rFFFEncoding  = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4}
+	rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4}
+	rFFIEncoding  = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4}
+	rFIEncoding   = encoding{encode: encodeRFI, validate: validateRFI, length: 4}
+	rIFEncoding   = encoding{encode: encodeRIF, validate: validateRIF, length: 4}
+	rFFEncoding   = encoding{encode: encodeRFF, validate: validateRFF, length: 4}
+
+	iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4}
+	iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4}
+
+	sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4}
+	sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4}
+
+	bEncoding = encoding{encode: encodeB, validate: validateB, length: 4}
+	uEncoding = encoding{encode: encodeU, validate: validateU, length: 4}
+	jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4}
+
+	// rawEncoding encodes a raw instruction byte sequence.
+	rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4}
+
+	// pseudoOpEncoding panics if encoding is attempted, but does no validation.
+	pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0}
+
+	// badEncoding is used when an invalid op is encountered.
+	// An error has already been generated, so let anything else through.
+	badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0}
+)
+
+// encodings contains the encodings for RISC-V instructions.
+// Instructions are masked with obj.AMask to keep indices small.
+var encodings = [ALAST & obj.AMask]encoding{
+
+	// Unprivileged ISA
+
+	// 2.4: Integer Computational Instructions
+	AADDI & obj.AMask:  iIEncoding,
+	ASLTI & obj.AMask:  iIEncoding,
+	ASLTIU & obj.AMask: iIEncoding,
+	AANDI & obj.AMask:  iIEncoding,
+	AORI & obj.AMask:   iIEncoding,
+	AXORI & obj.AMask:  iIEncoding,
+	ASLLI & obj.AMask:  iIEncoding,
+	ASRLI & obj.AMask:  iIEncoding,
+	ASRAI & obj.AMask:  iIEncoding,
+	ALUI & obj.AMask:   uEncoding,
+	AAUIPC & obj.AMask: uEncoding,
+	AADD & obj.AMask:   rIIIEncoding,
+	ASLT & obj.AMask:   rIIIEncoding,
+	ASLTU & obj.AMask:  rIIIEncoding,
+	AAND & obj.AMask:   rIIIEncoding,
+	AOR & obj.AMask:    rIIIEncoding,
+	AXOR & obj.AMask:   rIIIEncoding,
+	ASLL & obj.AMask:   rIIIEncoding,
+	ASRL & obj.AMask:   rIIIEncoding,
+	ASUB & obj.AMask:   rIIIEncoding,
+	ASRA & obj.AMask:   rIIIEncoding,
+
+	// 2.5: Control Transfer Instructions
+	AJAL & obj.AMask:  jEncoding,
+	AJALR & obj.AMask: iIEncoding,
+	ABEQ & obj.AMask:  bEncoding,
+	ABNE & obj.AMask:  bEncoding,
+	ABLT & obj.AMask:  bEncoding,
+	ABLTU & obj.AMask: bEncoding,
+	ABGE & obj.AMask:  bEncoding,
+	ABGEU & obj.AMask: bEncoding,
+
+	// 2.6: Load and Store Instructions
+	ALW & obj.AMask:  iIEncoding,
+	ALWU & obj.AMask: iIEncoding,
+	ALH & obj.AMask:  iIEncoding,
+	ALHU & obj.AMask: iIEncoding,
+	ALB & obj.AMask:  iIEncoding,
+	ALBU & obj.AMask: iIEncoding,
+	ASW & obj.AMask:  sIEncoding,
+	ASH & obj.AMask:  sIEncoding,
+	ASB & obj.AMask:  sIEncoding,
+
+	// 2.7: Memory Ordering
+	AFENCE & obj.AMask: iIEncoding,
+
+	// 5.2: Integer Computational Instructions (RV64I)
+	AADDIW & obj.AMask: iIEncoding,
+	ASLLIW & obj.AMask: iIEncoding,
+	ASRLIW & obj.AMask: iIEncoding,
+	ASRAIW & obj.AMask: iIEncoding,
+	AADDW & obj.AMask:  rIIIEncoding,
+	ASLLW & obj.AMask:  rIIIEncoding,
+	ASRLW & obj.AMask:  rIIIEncoding,
+	ASUBW & obj.AMask:  rIIIEncoding,
+	ASRAW & obj.AMask:  rIIIEncoding,
+
+	// 5.3: Load and Store Instructions (RV64I)
+	ALD & obj.AMask: iIEncoding,
+	ASD & obj.AMask: sIEncoding,
+
+	// 7.1: Multiplication Operations
+	AMUL & obj.AMask:    rIIIEncoding,
+	AMULH & obj.AMask:   rIIIEncoding,
+	AMULHU & obj.AMask:  rIIIEncoding,
+	AMULHSU & obj.AMask: rIIIEncoding,
+	AMULW & obj.AMask:   rIIIEncoding,
+	ADIV & obj.AMask:    rIIIEncoding,
+	ADIVU & obj.AMask:   rIIIEncoding,
+	AREM & obj.AMask:    rIIIEncoding,
+	AREMU & obj.AMask:   rIIIEncoding,
+	ADIVW & obj.AMask:   rIIIEncoding,
+	ADIVUW & obj.AMask:  rIIIEncoding,
+	AREMW & obj.AMask:   rIIIEncoding,
+	AREMUW & obj.AMask:  rIIIEncoding,
+
+	// 8.2: Load-Reserved/Store-Conditional
+	ALRW & obj.AMask: rIIIEncoding,
+	ALRD & obj.AMask: rIIIEncoding,
+	ASCW & obj.AMask: rIIIEncoding,
+	ASCD & obj.AMask: rIIIEncoding,
+
+	// 8.3: Atomic Memory Operations
+	AAMOSWAPW & obj.AMask: rIIIEncoding,
+	AAMOSWAPD & obj.AMask: rIIIEncoding,
+	AAMOADDW & obj.AMask:  rIIIEncoding,
+	AAMOADDD & obj.AMask:  rIIIEncoding,
+	AAMOANDW & obj.AMask:  rIIIEncoding,
+	AAMOANDD & obj.AMask:  rIIIEncoding,
+	AAMOORW & obj.AMask:   rIIIEncoding,
+	AAMOORD & obj.AMask:   rIIIEncoding,
+	AAMOXORW & obj.AMask:  rIIIEncoding,
+	AAMOXORD & obj.AMask:  rIIIEncoding,
+	AAMOMAXW & obj.AMask:  rIIIEncoding,
+	AAMOMAXD & obj.AMask:  rIIIEncoding,
+	AAMOMAXUW & obj.AMask: rIIIEncoding,
+	AAMOMAXUD & obj.AMask: rIIIEncoding,
+	AAMOMINW & obj.AMask:  rIIIEncoding,
+	AAMOMIND & obj.AMask:  rIIIEncoding,
+	AAMOMINUW & obj.AMask: rIIIEncoding,
+	AAMOMINUD & obj.AMask: rIIIEncoding,
+
+	// 10.1: Base Counters and Timers
+	ARDCYCLE & obj.AMask:   iIEncoding,
+	ARDTIME & obj.AMask:    iIEncoding,
+	ARDINSTRET & obj.AMask: iIEncoding,
+
+	// 11.5: Single-Precision Load and Store Instructions
+	AFLW & obj.AMask: iFEncoding,
+	AFSW & obj.AMask: sFEncoding,
+
+	// 11.6: Single-Precision Floating-Point Computational Instructions
+	AFADDS & obj.AMask:   rFFFEncoding,
+	AFSUBS & obj.AMask:   rFFFEncoding,
+	AFMULS & obj.AMask:   rFFFEncoding,
+	AFDIVS & obj.AMask:   rFFFEncoding,
+	AFMINS & obj.AMask:   rFFFEncoding,
+	AFMAXS & obj.AMask:   rFFFEncoding,
+	AFSQRTS & obj.AMask:  rFFFEncoding,
+	AFMADDS & obj.AMask:  rFFFFEncoding,
+	AFMSUBS & obj.AMask:  rFFFFEncoding,
+	AFNMSUBS & obj.AMask: rFFFFEncoding,
+	AFNMADDS & obj.AMask: rFFFFEncoding,
+
+	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
+	AFCVTWS & obj.AMask:  rFIEncoding,
+	AFCVTLS & obj.AMask:  rFIEncoding,
+	AFCVTSW & obj.AMask:  rIFEncoding,
+	AFCVTSL & obj.AMask:  rIFEncoding,
+	AFCVTWUS & obj.AMask: rFIEncoding,
+	AFCVTLUS & obj.AMask: rFIEncoding,
+	AFCVTSWU & obj.AMask: rIFEncoding,
+	AFCVTSLU & obj.AMask: rIFEncoding,
+	AFSGNJS & obj.AMask:  rFFFEncoding,
+	AFSGNJNS & obj.AMask: rFFFEncoding,
+	AFSGNJXS & obj.AMask: rFFFEncoding,
+	AFMVXS & obj.AMask:   rFIEncoding,
+	AFMVSX & obj.AMask:   rIFEncoding,
+	AFMVXW & obj.AMask:   rFIEncoding,
+	AFMVWX & obj.AMask:   rIFEncoding,
+
+	// 11.8: Single-Precision Floating-Point Compare Instructions
+	AFEQS & obj.AMask: rFFIEncoding,
+	AFLTS & obj.AMask: rFFIEncoding,
+	AFLES & obj.AMask: rFFIEncoding,
+
+	// 11.9: Single-Precision Floating-Point Classify Instruction
+	AFCLASSS & obj.AMask: rFIEncoding,
+
+	// 12.3: Double-Precision Load and Store Instructions
+	AFLD & obj.AMask: iFEncoding,
+	AFSD & obj.AMask: sFEncoding,
+
+	// 12.4: Double-Precision Floating-Point Computational Instructions
+	AFADDD & obj.AMask:   rFFFEncoding,
+	AFSUBD & obj.AMask:   rFFFEncoding,
+	AFMULD & obj.AMask:   rFFFEncoding,
+	AFDIVD & obj.AMask:   rFFFEncoding,
+	AFMIND & obj.AMask:   rFFFEncoding,
+	AFMAXD & obj.AMask:   rFFFEncoding,
+	AFSQRTD & obj.AMask:  rFFFEncoding,
+	AFMADDD & obj.AMask:  rFFFFEncoding,
+	AFMSUBD & obj.AMask:  rFFFFEncoding,
+	AFNMSUBD & obj.AMask: rFFFFEncoding,
+	AFNMADDD & obj.AMask: rFFFFEncoding,
+
+	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
+	AFCVTWD & obj.AMask:  rFIEncoding,
+	AFCVTLD & obj.AMask:  rFIEncoding,
+	AFCVTDW & obj.AMask:  rIFEncoding,
+	AFCVTDL & obj.AMask:  rIFEncoding,
+	AFCVTWUD & obj.AMask: rFIEncoding,
+	AFCVTLUD & obj.AMask: rFIEncoding,
+	AFCVTDWU & obj.AMask: rIFEncoding,
+	AFCVTDLU & obj.AMask: rIFEncoding,
+	AFCVTSD & obj.AMask:  rFFEncoding,
+	AFCVTDS & obj.AMask:  rFFEncoding,
+	AFSGNJD & obj.AMask:  rFFFEncoding,
+	AFSGNJND & obj.AMask: rFFFEncoding,
+	AFSGNJXD & obj.AMask: rFFFEncoding,
+	AFMVXD & obj.AMask:   rFIEncoding,
+	AFMVDX & obj.AMask:   rIFEncoding,
+
+	// 12.6: Double-Precision Floating-Point Compare Instructions
+	AFEQD & obj.AMask: rFFIEncoding,
+	AFLTD & obj.AMask: rFFIEncoding,
+	AFLED & obj.AMask: rFFIEncoding,
+
+	// 12.7: Double-Precision Floating-Point Classify Instruction
+	AFCLASSD & obj.AMask: rFIEncoding,
+
+	// Privileged ISA
+
+	// 3.2.1: Environment Call and Breakpoint
+	AECALL & obj.AMask:  iIEncoding,
+	AEBREAK & obj.AMask: iIEncoding,
+
+	// Escape hatch
+	AWORD & obj.AMask: rawEncoding,
+
+	// Pseudo-operations
+	obj.AFUNCDATA: pseudoOpEncoding,
+	obj.APCDATA:   pseudoOpEncoding,
+	obj.ATEXT:     pseudoOpEncoding,
+	obj.ANOP:      pseudoOpEncoding,
+	obj.ADUFFZERO: pseudoOpEncoding,
+	obj.ADUFFCOPY: pseudoOpEncoding,
+	obj.APCALIGN:  pseudoOpEncoding,
+}
+
+// encodingForAs returns the encoding for an obj.As.
+func encodingForAs(as obj.As) (encoding, error) {
+	if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 {
+		return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as)
+	}
+	asi := as & obj.AMask
+	if int(asi) >= len(encodings) {
+		return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as)
+	}
+	enc := encodings[asi]
+	if enc.validate == nil {
+		return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as)
+	}
+	return enc, nil
+}
+
+type instruction struct {
+	p      *obj.Prog // Prog that instruction is for
+	as     obj.As    // Assembler opcode
+	rd     uint32    // Destination register
+	rs1    uint32    // Source register 1
+	rs2    uint32    // Source register 2
+	rs3    uint32    // Source register 3
+	imm    int64     // Immediate
+	funct3 uint32    // Function 3
+	funct7 uint32    // Function 7 (or Function 2)
+}
+
+func (ins *instruction) String() string {
+	if ins.p == nil {
+		return ins.as.String()
+	}
+	var suffix string
+	if ins.p.As != ins.as {
+		suffix = fmt.Sprintf(" (%v)", ins.as)
+	}
+	return fmt.Sprintf("%v%v", ins.p, suffix)
+}
+
+func (ins *instruction) encode() (uint32, error) {
+	enc, err := encodingForAs(ins.as)
+	if err != nil {
+		return 0, err
+	}
+	if enc.length <= 0 {
+		return 0, fmt.Errorf("%v: encoding called for a pseudo instruction", ins.as)
+	}
+	return enc.encode(ins), nil
+}
+
+func (ins *instruction) length() int {
+	enc, err := encodingForAs(ins.as)
+	if err != nil {
+		return 0
+	}
+	return enc.length
+}
+
+func (ins *instruction) validate(ctxt *obj.Link) {
+	enc, err := encodingForAs(ins.as)
+	if err != nil {
+		ctxt.Diag(err.Error())
+		return
+	}
+	enc.validate(ctxt, ins)
+}
+
+func (ins *instruction) usesRegTmp() bool {
+	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
+}
+
+// instructionForProg returns the default *obj.Prog to instruction mapping.
+func instructionForProg(p *obj.Prog) *instruction {
+	ins := &instruction{
+		as:  p.As,
+		rd:  uint32(p.To.Reg),
+		rs1: uint32(p.Reg),
+		rs2: uint32(p.From.Reg),
+		imm: p.From.Offset,
+	}
+	if len(p.RestArgs) == 1 {
+		ins.rs3 = uint32(p.RestArgs[0].Reg)
+	}
+	return ins
+}
+
+// instructionsForOpImmediate returns the machine instructions for an immediate
+// operand. The instruction is specified by as and the source register is
+// specified by rs, instead of the obj.Prog.
+func instructionsForOpImmediate(p *obj.Prog, as obj.As, rs int16) []*instruction {
+	// <opi> $imm, REG, TO
+	ins := instructionForProg(p)
+	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
+
+	low, high, err := Split32BitImmediate(ins.imm)
+	if err != nil {
+		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm, err)
+		return nil
+	}
+	if high == 0 {
+		return []*instruction{ins}
+	}
+
+	// Split into two additions, if possible.
+	// Do not split SP-writing instructions, as otherwise the recorded SP delta may be wrong.
+	if p.Spadj == 0 && ins.as == AADDI && ins.imm >= -(1<<12) && ins.imm < 1<<12-1 {
+		imm0 := ins.imm / 2
+		imm1 := ins.imm - imm0
+
+		// ADDI $(imm/2), REG, TO
+		// ADDI $(imm-imm/2), TO, TO
+		ins.imm = imm0
+		insADDI := &instruction{as: AADDI, rd: ins.rd, rs1: ins.rd, imm: imm1}
+		return []*instruction{ins, insADDI}
+	}
+
+	// LUI $high, TMP
+	// ADDIW $low, TMP, TMP
+	// <op> TMP, REG, TO
+	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
+	insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP, imm: low}
+	switch ins.as {
+	case AADDI:
+		ins.as = AADD
+	case AANDI:
+		ins.as = AAND
+	case AORI:
+		ins.as = AOR
+	case AXORI:
+		ins.as = AXOR
+	default:
+		p.Ctxt.Diag("unsupported immediate instruction %v for splitting", p)
+		return nil
+	}
+	ins.rs2 = REG_TMP
+	if low == 0 {
+		return []*instruction{insLUI, ins}
+	}
+	return []*instruction{insLUI, insADDIW, ins}
+}
+
+// instructionsForLoad returns the machine instructions for a load. The load
+// instruction is specified by as and the base/source register is specified
+// by rs, instead of the obj.Prog.
+func instructionsForLoad(p *obj.Prog, as obj.As, rs int16) []*instruction {
+	if p.From.Type != obj.TYPE_MEM {
+		p.Ctxt.Diag("%v requires memory for source", p)
+		return nil
+	}
+
+	switch as {
+	case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD:
+	default:
+		p.Ctxt.Diag("%v: unknown load instruction %v", p, as)
+		return nil
+	}
+
+	// <load> $imm, REG, TO (load $imm+(REG), TO)
+	ins := instructionForProg(p)
+	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
+	ins.imm = p.From.Offset
+
+	low, high, err := Split32BitImmediate(ins.imm)
+	if err != nil {
+		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
+		return nil
+	}
+	if high == 0 {
+		return []*instruction{ins}
+	}
+
+	// LUI $high, TMP
+	// ADD TMP, REG, TMP
+	// <load> $low, TMP, TO
+	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
+	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rs1}
+	ins.rs1, ins.imm = REG_TMP, low
+
+	return []*instruction{insLUI, insADD, ins}
+}
+
+// instructionsForStore returns the machine instructions for a store. The store
+// instruction is specified by as and the target/source register is specified
+// by rd, instead of the obj.Prog.
+func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction {
+	if p.To.Type != obj.TYPE_MEM {
+		p.Ctxt.Diag("%v requires memory for destination", p)
+		return nil
+	}
+
+	switch as {
+	case ASW, ASH, ASB, ASD, AFSW, AFSD:
+	default:
+		p.Ctxt.Diag("%v: unknown store instruction %v", p, as)
+		return nil
+	}
+
+	// <store> $imm, REG, TO (store $imm+(TO), REG)
+	ins := instructionForProg(p)
+	ins.as, ins.rd, ins.rs1, ins.rs2 = as, uint32(rd), uint32(p.From.Reg), obj.REG_NONE
+	ins.imm = p.To.Offset
+
+	low, high, err := Split32BitImmediate(ins.imm)
+	if err != nil {
+		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
+		return nil
+	}
+	if high == 0 {
+		return []*instruction{ins}
+	}
+
+	// LUI $high, TMP
+	// ADD TMP, TO, TMP
+	// <store> $low, REG, TMP
+	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
+	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rd}
+	ins.rd, ins.imm = REG_TMP, low
+
+	return []*instruction{insLUI, insADD, ins}
+}
+
+func instructionsForTLS(p *obj.Prog, ins *instruction) []*instruction {
+	insAddTP := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: REG_TP}
+
+	var inss []*instruction
+	if p.Ctxt.Flag_shared {
+		// TLS initial-exec mode - load TLS offset from GOT, add the thread pointer
+		// register, then load from or store to the resulting memory location.
+		insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
+		insLoadTLSOffset := &instruction{as: ALD, rd: REG_TMP, rs1: REG_TMP}
+		inss = []*instruction{insAUIPC, insLoadTLSOffset, insAddTP, ins}
+	} else {
+		// TLS local-exec mode - load upper TLS offset, add the lower TLS offset,
+		// add the thread pointer register, then load from or store to the resulting
+		// memory location. Note that this differs from the suggested three
+		// instruction sequence, as the Go linker does not currently have an
+		// easy way to handle relocation across 12 bytes of machine code.
+		insLUI := &instruction{as: ALUI, rd: REG_TMP}
+		insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP}
+		inss = []*instruction{insLUI, insADDIW, insAddTP, ins}
+	}
+	return inss
+}
+
+func instructionsForTLSLoad(p *obj.Prog) []*instruction {
+	if p.From.Sym.Type != objabi.STLSBSS {
+		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.From.Sym)
+		return nil
+	}
+
+	ins := instructionForProg(p)
+	ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), REG_TMP, obj.REG_NONE, 0
+
+	return instructionsForTLS(p, ins)
+}
+
+func instructionsForTLSStore(p *obj.Prog) []*instruction {
+	if p.To.Sym.Type != objabi.STLSBSS {
+		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.To.Sym)
+		return nil
+	}
+
+	ins := instructionForProg(p)
+	ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
+
+	return instructionsForTLS(p, ins)
+}
+
+// instructionsForMOV returns the machine instructions for an *obj.Prog that
+// uses a MOV pseudo-instruction.
+func instructionsForMOV(p *obj.Prog) []*instruction {
+	ins := instructionForProg(p)
+	inss := []*instruction{ins}
+
+	if p.Reg != 0 {
+		p.Ctxt.Diag("%v: illegal MOV instruction", p)
+		return nil
+	}
+
+	switch {
+	case p.From.Type == obj.TYPE_CONST && p.To.Type == obj.TYPE_REG:
+		// Handle constant to register moves.
+		if p.As != AMOV {
+			p.Ctxt.Diag("%v: unsupported constant load", p)
+			return nil
+		}
+
+		// For constants larger than 32 bits in size that have trailing zeros,
+		// use the value with the trailing zeros removed and then use a SLLI
+		// instruction to restore the original constant.
+		// For example:
+		// 	MOV $0x8000000000000000, X10
+		// becomes
+		// 	MOV $1, X10
+		// 	SLLI $63, X10, X10
+		var insSLLI *instruction
+		if err := immIFits(ins.imm, 32); err != nil {
+			ctz := bits.TrailingZeros64(uint64(ins.imm))
+			if err := immIFits(ins.imm>>ctz, 32); err == nil {
+				ins.imm = ins.imm >> ctz
+				insSLLI = &instruction{as: ASLLI, rd: ins.rd, rs1: ins.rd, imm: int64(ctz)}
+			}
+		}
+
+		low, high, err := Split32BitImmediate(ins.imm)
+		if err != nil {
+			p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err)
+			return nil
+		}
+
+		// MOV $c, R -> ADD $c, ZERO, R
+		ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, REG_ZERO, obj.REG_NONE, low
+
+		// LUI is only necessary if the constant does not fit in 12 bits.
+		if high != 0 {
+			// LUI top20bits(c), R
+			// ADD bottom12bits(c), R, R
+			insLUI := &instruction{as: ALUI, rd: ins.rd, imm: high}
+			inss = []*instruction{insLUI}
+			if low != 0 {
+				ins.as, ins.rs1 = AADDIW, ins.rd
+				inss = append(inss, ins)
+			}
+		}
+		if insSLLI != nil {
+			inss = append(inss, insSLLI)
+		}
+
+	case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG:
+		p.Ctxt.Diag("%v: constant load must target register", p)
+		return nil
+
+	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG:
+		// Handle register to register moves.
+		switch p.As {
+		case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb
+			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0
+		case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb
+			ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0
+		case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb
+			ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255
+		case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb
+			ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg)
+		case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb
+			ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg)
+		case AMOVB, AMOVH:
+			// Use SLLI/SRAI to extend.
+			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
+			if p.As == AMOVB {
+				ins.imm = 56
+			} else if p.As == AMOVH {
+				ins.imm = 48
+			}
+			ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
+			inss = append(inss, ins2)
+		case AMOVHU, AMOVWU:
+			// Use SLLI/SRLI to extend.
+			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
+			if p.As == AMOVHU {
+				ins.imm = 48
+			} else if p.As == AMOVWU {
+				ins.imm = 32
+			}
+			ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
+			inss = append(inss, ins2)
+		}
+
+	case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
+		// Memory to register loads.
+		switch p.From.Name {
+		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
+			// MOV c(Rs), Rd -> L $c, Rs, Rd
+			inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From))
+
+		case obj.NAME_EXTERN, obj.NAME_STATIC:
+			if p.From.Sym.Type == objabi.STLSBSS {
+				return instructionsForTLSLoad(p)
+			}
+
+			// Note that the values for $off_hi and $off_lo are currently
+			// zero and will be assigned during relocation.
+			//
+			// AUIPC $off_hi, Rd
+			// L $off_lo, Rd, Rd
+			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
+			ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0
+			inss = []*instruction{insAUIPC, ins}
+
+		default:
+			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
+			return nil
+		}
+
+	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
+		// Register to memory stores.
+		switch p.As {
+		case AMOVBU, AMOVHU, AMOVWU:
+			p.Ctxt.Diag("%v: unsupported unsigned store", p)
+			return nil
+		}
+		switch p.To.Name {
+		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
+			// MOV Rs, c(Rd) -> S $c, Rs, Rd
+			inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To))
+
+		case obj.NAME_EXTERN, obj.NAME_STATIC:
+			if p.To.Sym.Type == objabi.STLSBSS {
+				return instructionsForTLSStore(p)
+			}
+
+			// Note that the values for $off_hi and $off_lo are currently
+			// zero and will be assigned during relocation.
+			//
+			// AUIPC $off_hi, Rtmp
+			// S $off_lo, Rtmp, Rd
+			insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
+			ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
+			inss = []*instruction{insAUIPC, ins}
+
+		default:
+			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
+			return nil
+		}
+
+	case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
+		// MOV $sym+off(SP/SB), R
+		if p.As != AMOV {
+			p.Ctxt.Diag("%v: unsupported address load", p)
+			return nil
+		}
+		switch p.From.Name {
+		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
+			inss = instructionsForOpImmediate(p, AADDI, addrToReg(p.From))
+
+		case obj.NAME_EXTERN, obj.NAME_STATIC:
+			// Note that the values for $off_hi and $off_lo are currently
+			// zero and will be assigned during relocation.
+			//
+			// AUIPC $off_hi, R
+			// ADDI $off_lo, R
+			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
+			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, ins.rd, obj.REG_NONE, 0
+			inss = []*instruction{insAUIPC, ins}
+
+		default:
+			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
+			return nil
+		}
+
+	case p.From.Type == obj.TYPE_ADDR && p.To.Type != obj.TYPE_REG:
+		p.Ctxt.Diag("%v: address load must target register", p)
+		return nil
+
+	default:
+		p.Ctxt.Diag("%v: unsupported MOV", p)
+		return nil
+	}
+
+	return inss
+}
+
+// instructionsForProg returns the machine instructions for an *obj.Prog.
+func instructionsForProg(p *obj.Prog) []*instruction {
+	ins := instructionForProg(p)
+	inss := []*instruction{ins}
+
+	if len(p.RestArgs) > 1 {
+		p.Ctxt.Diag("too many source registers")
+		return nil
+	}
+
+	switch ins.as {
+	case AJAL, AJALR:
+		ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE
+		ins.imm = p.To.Offset
+
+	case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
+		switch ins.as {
+		case ABEQZ:
+			ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg)
+		case ABGEZ:
+			ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg)
+		case ABGT:
+			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg)
+		case ABGTU:
+			ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg)
+		case ABGTZ:
+			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO
+		case ABLE:
+			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg)
+		case ABLEU:
+			ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg)
+		case ABLEZ:
+			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO
+		case ABLTZ:
+			ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg)
+		case ABNEZ:
+			ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg)
+		}
+		ins.imm = p.To.Offset
+
+	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
+		inss = instructionsForMOV(p)
+
+	case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD:
+		inss = instructionsForLoad(p, ins.as, p.From.Reg)
+
+	case ASW, ASH, ASB, ASD, AFSW, AFSD:
+		inss = instructionsForStore(p, ins.as, p.To.Reg)
+
+	case ALRW, ALRD:
+		// Set aq to use acquire access ordering
+		ins.funct7 = 2
+		ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
+
+	case AADDI, AANDI, AORI, AXORI:
+		inss = instructionsForOpImmediate(p, ins.as, p.Reg)
+
+	case ASCW, ASCD:
+		// Set release access ordering
+		ins.funct7 = 1
+		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
+
+	case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
+		AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
+		// Set aqrl to use acquire & release access ordering
+		ins.funct7 = 3
+		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
+
+	case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:
+		insEnc := encode(p.As)
+		if p.To.Type == obj.TYPE_NONE {
+			ins.rd = REG_ZERO
+		}
+		ins.rs1 = REG_ZERO
+		ins.imm = insEnc.csr
+
+	case AFENCE:
+		ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE
+		ins.imm = 0x0ff
+
+	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
+		// Set the rounding mode in funct3 to round to zero.
+		ins.funct3 = 1
+
+	case AFNES, AFNED:
+		// Replace FNE[SD] with FEQ[SD] and NOT.
+		if p.To.Type != obj.TYPE_REG {
+			p.Ctxt.Diag("%v needs an integer register output", p)
+			return nil
+		}
+		if ins.as == AFNES {
+			ins.as = AFEQS
+		} else {
+			ins.as = AFEQD
+		}
+		ins2 := &instruction{
+			as:  AXORI, // [bit] xor 1 = not [bit]
+			rd:  ins.rd,
+			rs1: ins.rd,
+			imm: 1,
+		}
+		inss = append(inss, ins2)
+
+	case AFSQRTS, AFSQRTD:
+		// These instructions expect a zero (i.e. float register 0)
+		// to be the second input operand.
+		ins.rs1 = uint32(p.From.Reg)
+		ins.rs2 = REG_F0
+
+	case AFMADDS, AFMSUBS, AFNMADDS, AFNMSUBS,
+		AFMADDD, AFMSUBD, AFNMADDD, AFNMSUBD:
+		// Swap the first two operands so that the operands are in the same
+		// order as they are in the specification: RS1, RS2, RS3, RD.
+		ins.rs1, ins.rs2 = ins.rs2, ins.rs1
+
+	case ANEG, ANEGW:
+		// NEG rs, rd -> SUB rs, X0, rd
+		ins.as = ASUB
+		if p.As == ANEGW {
+			ins.as = ASUBW
+		}
+		ins.rs1 = REG_ZERO
+		if ins.rd == obj.REG_NONE {
+			ins.rd = ins.rs2
+		}
+
+	case ANOT:
+		// NOT rs, rd -> XORI $-1, rs, rd
+		ins.as = AXORI
+		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
+		if ins.rd == obj.REG_NONE {
+			ins.rd = ins.rs1
+		}
+		ins.imm = -1
+
+	case ASEQZ:
+		// SEQZ rs, rd -> SLTIU $1, rs, rd
+		ins.as = ASLTIU
+		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
+		ins.imm = 1
+
+	case ASNEZ:
+		// SNEZ rs, rd -> SLTU rs, x0, rd
+		ins.as = ASLTU
+		ins.rs1 = REG_ZERO
+
+	case AFABSS:
+		// FABSS rs, rd -> FSGNJXS rs, rs, rd
+		ins.as = AFSGNJXS
+		ins.rs1 = uint32(p.From.Reg)
+
+	case AFABSD:
+		// FABSD rs, rd -> FSGNJXD rs, rs, rd
+		ins.as = AFSGNJXD
+		ins.rs1 = uint32(p.From.Reg)
+
+	case AFNEGS:
+		// FNEGS rs, rd -> FSGNJNS rs, rs, rd
+		ins.as = AFSGNJNS
+		ins.rs1 = uint32(p.From.Reg)
+
+	case AFNEGD:
+		// FNEGD rs, rd -> FSGNJND rs, rs, rd
+		ins.as = AFSGNJND
+		ins.rs1 = uint32(p.From.Reg)
+
+	case ASLLI, ASRLI, ASRAI:
+		if ins.imm < 0 || ins.imm > 63 {
+			p.Ctxt.Diag("%v: shift amount out of range 0 to 63", p)
+		}
+
+	case ASLLIW, ASRLIW, ASRAIW:
+		if ins.imm < 0 || ins.imm > 31 {
+			p.Ctxt.Diag("%v: shift amount out of range 0 to 31", p)
+		}
+	}
+
+	for _, ins := range inss {
+		ins.p = p
+	}
+
+	return inss
+}
+
+// assemble emits machine code.
+// It is called at the very end of the assembly process.
+func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
+	if ctxt.Retpoline {
+		ctxt.Diag("-spectre=ret not supported on riscv")
+		ctxt.Retpoline = false // don't keep printing
+	}
+
+	// If errors were encountered during preprocess/validation, proceeding
+	// and attempting to encode said instructions will only lead to panics.
+	if ctxt.Errors > 0 {
+		return
+	}
+
+	for p := cursym.Func().Text; p != nil; p = p.Link {
+		switch p.As {
+		case AJAL:
+			if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
+				rel := obj.Addrel(cursym)
+				rel.Off = int32(p.Pc)
+				rel.Siz = 4
+				rel.Sym = p.To.Sym
+				rel.Add = p.To.Offset
+				rel.Type = objabi.R_RISCV_JAL
+			}
+		case AJALR:
+			if p.To.Sym != nil {
+				ctxt.Diag("%v: unexpected AJALR with to symbol", p)
+			}
+
+		case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
+			var addr *obj.Addr
+			var rt objabi.RelocType
+			if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC {
+				rt = objabi.R_RISCV_CALL
+				addr = &p.From
+			} else if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC {
+				rt = objabi.R_RISCV_PCREL_ITYPE
+				addr = &p.From
+			} else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC {
+				rt = objabi.R_RISCV_PCREL_STYPE
+				addr = &p.To
+			} else {
+				break
+			}
+			if p.As == AAUIPC {
+				if p.Link == nil {
+					ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction")
+					break
+				}
+				addr = &p.RestArgs[0].Addr
+			}
+			if addr.Sym == nil {
+				ctxt.Diag("PC-relative relocation missing symbol")
+				break
+			}
+			if addr.Sym.Type == objabi.STLSBSS {
+				if ctxt.Flag_shared {
+					rt = objabi.R_RISCV_TLS_IE
+				} else {
+					rt = objabi.R_RISCV_TLS_LE
+				}
+			}
+
+			rel := obj.Addrel(cursym)
+			rel.Off = int32(p.Pc)
+			rel.Siz = 8
+			rel.Sym = addr.Sym
+			rel.Add = addr.Offset
+			rel.Type = rt
+
+		case obj.APCALIGN:
+			alignedValue := p.From.Offset
+			v := pcAlignPadLength(p.Pc, alignedValue)
+			offset := p.Pc
+			for ; v >= 4; v -= 4 {
+				// NOP
+				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
+				offset += 4
+			}
+			continue
+		}
+
+		offset := p.Pc
+		for _, ins := range instructionsForProg(p) {
+			if ic, err := ins.encode(); err == nil {
+				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
+				offset += int64(ins.length())
+			}
+			if ins.usesRegTmp() {
+				p.Mark |= USES_REG_TMP
+			}
+		}
+	}
+
+	obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil)
+}
+
+func isUnsafePoint(p *obj.Prog) bool {
+	return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
+}
+
+var LinkRISCV64 = obj.LinkArch{
+	Arch:           sys.ArchRISCV64,
+	Init:           buildop,
+	Preprocess:     preprocess,
+	Assemble:       assemble,
+	Progedit:       progedit,
+	UnaryDst:       unaryDst,
+	DWARFRegisters: RISCV64DWARFRegisters,
+}
diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go
new file mode 100644
index 0000000..843398d
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.go
@@ -0,0 +1,133 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build riscv64
+// +build riscv64
+
+package testbranch
+
+import (
+	"testing"
+)
+
+func testBEQZ(a int64) (r bool)
+func testBGE(a, b int64) (r bool)
+func testBGEU(a, b int64) (r bool)
+func testBGEZ(a int64) (r bool)
+func testBGT(a, b int64) (r bool)
+func testBGTU(a, b int64) (r bool)
+func testBGTZ(a int64) (r bool)
+func testBLE(a, b int64) (r bool)
+func testBLEU(a, b int64) (r bool)
+func testBLEZ(a int64) (r bool)
+func testBLT(a, b int64) (r bool)
+func testBLTU(a, b int64) (r bool)
+func testBLTZ(a int64) (r bool)
+func testBNEZ(a int64) (r bool)
+
+func testGoBGE(a, b int64) bool  { return a >= b }
+func testGoBGEU(a, b int64) bool { return uint64(a) >= uint64(b) }
+func testGoBGT(a, b int64) bool  { return a > b }
+func testGoBGTU(a, b int64) bool { return uint64(a) > uint64(b) }
+func testGoBLE(a, b int64) bool  { return a <= b }
+func testGoBLEU(a, b int64) bool { return uint64(a) <= uint64(b) }
+func testGoBLT(a, b int64) bool  { return a < b }
+func testGoBLTU(a, b int64) bool { return uint64(a) < uint64(b) }
+
+func TestBranchCondition(t *testing.T) {
+	tests := []struct {
+		ins  string
+		a    int64
+		b    int64
+		fn   func(a, b int64) bool
+		goFn func(a, b int64) bool
+		want bool
+	}{
+		{"BGE", 0, 1, testBGE, testGoBGE, false},
+		{"BGE", 0, 0, testBGE, testGoBGE, true},
+		{"BGE", 0, -1, testBGE, testGoBGE, true},
+		{"BGE", -1, 0, testBGE, testGoBGE, false},
+		{"BGE", 1, 0, testBGE, testGoBGE, true},
+		{"BGEU", 0, 1, testBGEU, testGoBGEU, false},
+		{"BGEU", 0, 0, testBGEU, testGoBGEU, true},
+		{"BGEU", 0, -1, testBGEU, testGoBGEU, false},
+		{"BGEU", -1, 0, testBGEU, testGoBGEU, true},
+		{"BGEU", 1, 0, testBGEU, testGoBGEU, true},
+		{"BGT", 0, 1, testBGT, testGoBGT, false},
+		{"BGT", 0, 0, testBGT, testGoBGT, false},
+		{"BGT", 0, -1, testBGT, testGoBGT, true},
+		{"BGT", -1, 0, testBGT, testGoBGT, false},
+		{"BGT", 1, 0, testBGT, testGoBGT, true},
+		{"BGTU", 0, 1, testBGTU, testGoBGTU, false},
+		{"BGTU", 0, 0, testBGTU, testGoBGTU, false},
+		{"BGTU", 0, -1, testBGTU, testGoBGTU, false},
+		{"BGTU", -1, 0, testBGTU, testGoBGTU, true},
+		{"BGTU", 1, 0, testBGTU, testGoBGTU, true},
+		{"BLE", 0, 1, testBLE, testGoBLE, true},
+		{"BLE", 0, 0, testBLE, testGoBLE, true},
+		{"BLE", 0, -1, testBLE, testGoBLE, false},
+		{"BLE", -1, 0, testBLE, testGoBLE, true},
+		{"BLE", 1, 0, testBLE, testGoBLE, false},
+		{"BLEU", 0, 1, testBLEU, testGoBLEU, true},
+		{"BLEU", 0, 0, testBLEU, testGoBLEU, true},
+		{"BLEU", 0, -1, testBLEU, testGoBLEU, true},
+		{"BLEU", -1, 0, testBLEU, testGoBLEU, false},
+		{"BLEU", 1, 0, testBLEU, testGoBLEU, false},
+		{"BLT", 0, 1, testBLT, testGoBLT, true},
+		{"BLT", 0, 0, testBLT, testGoBLT, false},
+		{"BLT", 0, -1, testBLT, testGoBLT, false},
+		{"BLT", -1, 0, testBLT, testGoBLT, true},
+		{"BLT", 1, 0, testBLT, testGoBLT, false},
+		{"BLTU", 0, 1, testBLTU, testGoBLTU, true},
+		{"BLTU", 0, 0, testBLTU, testGoBLTU, false},
+		{"BLTU", 0, -1, testBLTU, testGoBLTU, true},
+		{"BLTU", -1, 0, testBLTU, testGoBLTU, false},
+		{"BLTU", 1, 0, testBLTU, testGoBLTU, false},
+	}
+	for _, test := range tests {
+		t.Run(test.ins, func(t *testing.T) {
+			if got := test.fn(test.a, test.b); got != test.want {
+				t.Errorf("Assembly %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want)
+			}
+			if got := test.goFn(test.a, test.b); got != test.want {
+				t.Errorf("Go %v %v, %v = %v, want %v", test.ins, test.a, test.b, got, test.want)
+			}
+		})
+	}
+}
+
+func TestBranchZero(t *testing.T) {
+	tests := []struct {
+		ins  string
+		a    int64
+		fn   func(a int64) bool
+		want bool
+	}{
+		{"BEQZ", -1, testBEQZ, false},
+		{"BEQZ", 0, testBEQZ, true},
+		{"BEQZ", 1, testBEQZ, false},
+		{"BGEZ", -1, testBGEZ, false},
+		{"BGEZ", 0, testBGEZ, true},
+		{"BGEZ", 1, testBGEZ, true},
+		{"BGTZ", -1, testBGTZ, false},
+		{"BGTZ", 0, testBGTZ, false},
+		{"BGTZ", 1, testBGTZ, true},
+		{"BLEZ", -1, testBLEZ, true},
+		{"BLEZ", 0, testBLEZ, true},
+		{"BLEZ", 1, testBLEZ, false},
+		{"BLTZ", -1, testBLTZ, true},
+		{"BLTZ", 0, testBLTZ, false},
+		{"BLTZ", 1, testBLTZ, false},
+		{"BNEZ", -1, testBNEZ, true},
+		{"BNEZ", 0, testBNEZ, false},
+		{"BNEZ", 1, testBNEZ, true},
+	}
+	for _, test := range tests {
+		t.Run(test.ins, func(t *testing.T) {
+			if got := test.fn(test.a); got != test.want {
+				t.Errorf("%v %v = %v, want %v", test.ins, test.a, got, test.want)
+			}
+		})
+	}
+}
diff --git a/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s
new file mode 100644
index 0000000..d7141e3
--- /dev/null
+++ b/src/cmd/internal/obj/riscv/testdata/testbranch/branch_test.s
@@ -0,0 +1,156 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build riscv64
+// +build riscv64
+
+#include "textflag.h"
+
+// func testBEQZ(a int64) (r bool)
+TEXT ·testBEQZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BEQZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET
+
+// func testBGE(a, b int64) (r bool)
+TEXT ·testBGE(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BGE	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBGEU(a, b int64) (r bool)
+TEXT ·testBGEU(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BGEU	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBGEZ(a int64) (r bool)
+TEXT ·testBGEZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BGEZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET
+
+// func testBGT(a, b int64) (r bool)
+TEXT ·testBGT(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BGT	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBGTU(a, b int64) (r bool)
+TEXT ·testBGTU(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BGTU	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBGTZ(a int64) (r bool)
+TEXT ·testBGTZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BGTZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET
+
+// func testBLE(a, b int64) (r bool)
+TEXT ·testBLE(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BLE	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBLEU(a, b int64) (r bool)
+TEXT ·testBLEU(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BLEU	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBLEZ(a int64) (r bool)
+TEXT ·testBLEZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BLEZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET
+
+// func testBLT(a, b int64) (r bool)
+TEXT ·testBLT(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BLT	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBLTU(a, b int64) (r bool)
+TEXT ·testBLTU(SB),NOSPLIT,$0-17
+	MOV	a+0(FP), X5
+	MOV	b+8(FP), X6
+	MOV	$1, X7
+	BLTU	X5, X6, b
+	MOV	$0, X7
+b:
+	MOV	X7, r+16(FP)
+	RET
+
+// func testBLTZ(a int64) (r bool)
+TEXT ·testBLTZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BLTZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET
+
+// func testBNEZ(a int64) (r bool)
+TEXT ·testBNEZ(SB),NOSPLIT,$0-9
+	MOV	a+0(FP), X5
+	MOV	$1, X6
+	BNEZ	X5, b
+	MOV	$0, X6
+b:
+	MOV	X6, r+8(FP)
+	RET