summaryrefslogtreecommitdiffstats
path: root/src/internal/pkgbits
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/pkgbits')
-rw-r--r--src/internal/pkgbits/codes.go77
-rw-r--r--src/internal/pkgbits/decoder.go515
-rw-r--r--src/internal/pkgbits/doc.go30
-rw-r--r--src/internal/pkgbits/encoder.go394
-rw-r--r--src/internal/pkgbits/flags.go9
-rw-r--r--src/internal/pkgbits/reloc.go42
-rw-r--r--src/internal/pkgbits/support.go17
-rw-r--r--src/internal/pkgbits/sync.go136
-rw-r--r--src/internal/pkgbits/syncmarker_string.go89
9 files changed, 1309 insertions, 0 deletions
diff --git a/src/internal/pkgbits/codes.go b/src/internal/pkgbits/codes.go
new file mode 100644
index 0000000..f0cabde
--- /dev/null
+++ b/src/internal/pkgbits/codes.go
@@ -0,0 +1,77 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+// A Code is an enum value that can be encoded into bitstreams.
+//
+// Code types are preferable for enum types, because they allow
+// Decoder to detect desyncs.
+type Code interface {
+ // Marker returns the SyncMarker for the Code's dynamic type.
+ Marker() SyncMarker
+
+ // Value returns the Code's ordinal value.
+ Value() int
+}
+
+// A CodeVal distinguishes among go/constant.Value encodings.
+type CodeVal int
+
+func (c CodeVal) Marker() SyncMarker { return SyncVal }
+func (c CodeVal) Value() int { return int(c) }
+
+// Note: These values are public and cannot be changed without
+// updating the go/types importers.
+
+const (
+ ValBool CodeVal = iota
+ ValString
+ ValInt64
+ ValBigInt
+ ValBigRat
+ ValBigFloat
+)
+
+// A CodeType distinguishes among go/types.Type encodings.
+type CodeType int
+
+func (c CodeType) Marker() SyncMarker { return SyncType }
+func (c CodeType) Value() int { return int(c) }
+
+// Note: These values are public and cannot be changed without
+// updating the go/types importers.
+
+const (
+ TypeBasic CodeType = iota
+ TypeNamed
+ TypePointer
+ TypeSlice
+ TypeArray
+ TypeChan
+ TypeMap
+ TypeSignature
+ TypeStruct
+ TypeInterface
+ TypeUnion
+ TypeTypeParam
+)
+
+// A CodeObj distinguishes among go/types.Object encodings.
+type CodeObj int
+
+func (c CodeObj) Marker() SyncMarker { return SyncCodeObj }
+func (c CodeObj) Value() int { return int(c) }
+
+// Note: These values are public and cannot be changed without
+// updating the go/types importers.
+
+const (
+ ObjAlias CodeObj = iota
+ ObjConst
+ ObjType
+ ObjFunc
+ ObjVar
+ ObjStub
+)
diff --git a/src/internal/pkgbits/decoder.go b/src/internal/pkgbits/decoder.go
new file mode 100644
index 0000000..4fe024d
--- /dev/null
+++ b/src/internal/pkgbits/decoder.go
@@ -0,0 +1,515 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import (
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "go/constant"
+ "go/token"
+ "io"
+ "math/big"
+ "os"
+ "runtime"
+ "strings"
+)
+
+// A PkgDecoder provides methods for decoding a package's Unified IR
+// export data.
+type PkgDecoder struct {
+ // version is the file format version.
+ version uint32
+
+ // sync indicates whether the file uses sync markers.
+ sync bool
+
+ // pkgPath is the package path for the package to be decoded.
+ //
+ // TODO(mdempsky): Remove; unneeded since CL 391014.
+ pkgPath string
+
+ // elemData is the full data payload of the encoded package.
+ // Elements are densely and contiguously packed together.
+ //
+ // The last 8 bytes of elemData are the package fingerprint.
+ elemData string
+
+ // elemEnds stores the byte-offset end positions of element
+ // bitstreams within elemData.
+ //
+ // For example, element I's bitstream data starts at elemEnds[I-1]
+ // (or 0, if I==0) and ends at elemEnds[I].
+ //
+ // Note: elemEnds is indexed by absolute indices, not
+ // section-relative indices.
+ elemEnds []uint32
+
+ // elemEndsEnds stores the index-offset end positions of relocation
+ // sections within elemEnds.
+ //
+ // For example, section K's end positions start at elemEndsEnds[K-1]
+ // (or 0, if K==0) and end at elemEndsEnds[K].
+ elemEndsEnds [numRelocs]uint32
+
+ scratchRelocEnt []RelocEnt
+}
+
+// PkgPath returns the package path for the package
+//
+// TODO(mdempsky): Remove; unneeded since CL 391014.
+func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
+
+// SyncMarkers reports whether pr uses sync markers.
+func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
+
+// NewPkgDecoder returns a PkgDecoder initialized to read the Unified
+// IR export data from input. pkgPath is the package path for the
+// compilation unit that produced the export data.
+//
+// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
+func NewPkgDecoder(pkgPath, input string) PkgDecoder {
+ pr := PkgDecoder{
+ pkgPath: pkgPath,
+ }
+
+ // TODO(mdempsky): Implement direct indexing of input string to
+ // avoid copying the position information.
+
+ r := strings.NewReader(input)
+
+ assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
+
+ switch pr.version {
+ default:
+ panic(fmt.Errorf("unsupported version: %v", pr.version))
+ case 0:
+ // no flags
+ case 1:
+ var flags uint32
+ assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
+ pr.sync = flags&flagSyncMarkers != 0
+ }
+
+ assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
+
+ pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
+ assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
+
+ pos, err := r.Seek(0, io.SeekCurrent)
+ assert(err == nil)
+
+ pr.elemData = input[pos:]
+ assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
+
+ return pr
+}
+
+// NumElems returns the number of elements in section k.
+func (pr *PkgDecoder) NumElems(k RelocKind) int {
+ count := int(pr.elemEndsEnds[k])
+ if k > 0 {
+ count -= int(pr.elemEndsEnds[k-1])
+ }
+ return count
+}
+
+// TotalElems returns the total number of elements across all sections.
+func (pr *PkgDecoder) TotalElems() int {
+ return len(pr.elemEnds)
+}
+
+// Fingerprint returns the package fingerprint.
+func (pr *PkgDecoder) Fingerprint() [8]byte {
+ var fp [8]byte
+ copy(fp[:], pr.elemData[len(pr.elemData)-8:])
+ return fp
+}
+
+// AbsIdx returns the absolute index for the given (section, index)
+// pair.
+func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
+ absIdx := int(idx)
+ if k > 0 {
+ absIdx += int(pr.elemEndsEnds[k-1])
+ }
+ if absIdx >= int(pr.elemEndsEnds[k]) {
+ errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
+ }
+ return absIdx
+}
+
+// DataIdx returns the raw element bitstream for the given (section,
+// index) pair.
+func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
+ absIdx := pr.AbsIdx(k, idx)
+
+ var start uint32
+ if absIdx > 0 {
+ start = pr.elemEnds[absIdx-1]
+ }
+ end := pr.elemEnds[absIdx]
+
+ return pr.elemData[start:end]
+}
+
+// StringIdx returns the string value for the given string index.
+func (pr *PkgDecoder) StringIdx(idx Index) string {
+ return pr.DataIdx(RelocString, idx)
+}
+
+// NewDecoder returns a Decoder for the given (section, index) pair,
+// and decodes the given SyncMarker from the element bitstream.
+func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
+ r := pr.NewDecoderRaw(k, idx)
+ r.Sync(marker)
+ return r
+}
+
+// TempDecoder returns a Decoder for the given (section, index) pair,
+// and decodes the given SyncMarker from the element bitstream.
+// If possible the Decoder should be RetireDecoder'd when it is no longer
+// needed, this will avoid heap allocations.
+func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
+ r := pr.TempDecoderRaw(k, idx)
+ r.Sync(marker)
+ return r
+}
+
+func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
+ pr.scratchRelocEnt = d.Relocs
+ d.Relocs = nil
+}
+
+// NewDecoderRaw returns a Decoder for the given (section, index) pair.
+//
+// Most callers should use NewDecoder instead.
+func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
+ r := Decoder{
+ common: pr,
+ k: k,
+ Idx: idx,
+ }
+
+ r.Data.Reset(pr.DataIdx(k, idx))
+ r.Sync(SyncRelocs)
+ r.Relocs = make([]RelocEnt, r.Len())
+ for i := range r.Relocs {
+ r.Sync(SyncReloc)
+ r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
+ }
+
+ return r
+}
+
+func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
+ r := Decoder{
+ common: pr,
+ k: k,
+ Idx: idx,
+ }
+
+ r.Data.Reset(pr.DataIdx(k, idx))
+ r.Sync(SyncRelocs)
+ l := r.Len()
+ if cap(pr.scratchRelocEnt) >= l {
+ r.Relocs = pr.scratchRelocEnt[:l]
+ pr.scratchRelocEnt = nil
+ } else {
+ r.Relocs = make([]RelocEnt, l)
+ }
+ for i := range r.Relocs {
+ r.Sync(SyncReloc)
+ r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
+ }
+
+ return r
+}
+
+// A Decoder provides methods for decoding an individual element's
+// bitstream data.
+type Decoder struct {
+ common *PkgDecoder
+
+ Relocs []RelocEnt
+ Data strings.Reader
+
+ k RelocKind
+ Idx Index
+}
+
+func (r *Decoder) checkErr(err error) {
+ if err != nil {
+ errorf("unexpected decoding error: %w", err)
+ }
+}
+
+func (r *Decoder) rawUvarint() uint64 {
+ x, err := readUvarint(&r.Data)
+ r.checkErr(err)
+ return x
+}
+
+// readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
+// This avoids the interface conversion and thus has better escape properties,
+// which flows up the stack.
+func readUvarint(r *strings.Reader) (uint64, error) {
+ var x uint64
+ var s uint
+ for i := 0; i < binary.MaxVarintLen64; i++ {
+ b, err := r.ReadByte()
+ if err != nil {
+ if i > 0 && err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ return x, err
+ }
+ if b < 0x80 {
+ if i == binary.MaxVarintLen64-1 && b > 1 {
+ return x, overflow
+ }
+ return x | uint64(b)<<s, nil
+ }
+ x |= uint64(b&0x7f) << s
+ s += 7
+ }
+ return x, overflow
+}
+
+var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
+
+func (r *Decoder) rawVarint() int64 {
+ ux := r.rawUvarint()
+
+ // Zig-zag decode.
+ x := int64(ux >> 1)
+ if ux&1 != 0 {
+ x = ^x
+ }
+ return x
+}
+
+func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
+ e := r.Relocs[idx]
+ assert(e.Kind == k)
+ return e.Idx
+}
+
+// Sync decodes a sync marker from the element bitstream and asserts
+// that it matches the expected marker.
+//
+// If EnableSync is false, then Sync is a no-op.
+func (r *Decoder) Sync(mWant SyncMarker) {
+ if !r.common.sync {
+ return
+ }
+
+ pos, _ := r.Data.Seek(0, io.SeekCurrent)
+ mHave := SyncMarker(r.rawUvarint())
+ writerPCs := make([]int, r.rawUvarint())
+ for i := range writerPCs {
+ writerPCs[i] = int(r.rawUvarint())
+ }
+
+ if mHave == mWant {
+ return
+ }
+
+ // There's some tension here between printing:
+ //
+ // (1) full file paths that tools can recognize (e.g., so emacs
+ // hyperlinks the "file:line" text for easy navigation), or
+ //
+ // (2) short file paths that are easier for humans to read (e.g., by
+ // omitting redundant or irrelevant details, so it's easier to
+ // focus on the useful bits that remain).
+ //
+ // The current formatting favors the former, as it seems more
+ // helpful in practice. But perhaps the formatting could be improved
+ // to better address both concerns. For example, use relative file
+ // paths if they would be shorter, or rewrite file paths to contain
+ // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
+ // to reliably expand that again.
+
+ fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
+
+ fmt.Printf("\nfound %v, written at:\n", mHave)
+ if len(writerPCs) == 0 {
+ fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
+ }
+ for _, pc := range writerPCs {
+ fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
+ }
+
+ fmt.Printf("\nexpected %v, reading at:\n", mWant)
+ var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
+ n := runtime.Callers(2, readerPCs[:])
+ for _, pc := range fmtFrames(readerPCs[:n]...) {
+ fmt.Printf("\t%s\n", pc)
+ }
+
+ // We already printed a stack trace for the reader, so now we can
+ // simply exit. Printing a second one with panic or base.Fatalf
+ // would just be noise.
+ os.Exit(1)
+}
+
+// Bool decodes and returns a bool value from the element bitstream.
+func (r *Decoder) Bool() bool {
+ r.Sync(SyncBool)
+ x, err := r.Data.ReadByte()
+ r.checkErr(err)
+ assert(x < 2)
+ return x != 0
+}
+
+// Int64 decodes and returns an int64 value from the element bitstream.
+func (r *Decoder) Int64() int64 {
+ r.Sync(SyncInt64)
+ return r.rawVarint()
+}
+
+// Int64 decodes and returns a uint64 value from the element bitstream.
+func (r *Decoder) Uint64() uint64 {
+ r.Sync(SyncUint64)
+ return r.rawUvarint()
+}
+
+// Len decodes and returns a non-negative int value from the element bitstream.
+func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
+
+// Int decodes and returns an int value from the element bitstream.
+func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
+
+// Uint decodes and returns a uint value from the element bitstream.
+func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
+
+// Code decodes a Code value from the element bitstream and returns
+// its ordinal value. It's the caller's responsibility to convert the
+// result to an appropriate Code type.
+//
+// TODO(mdempsky): Ideally this method would have signature "Code[T
+// Code] T" instead, but we don't allow generic methods and the
+// compiler can't depend on generics yet anyway.
+func (r *Decoder) Code(mark SyncMarker) int {
+ r.Sync(mark)
+ return r.Len()
+}
+
+// Reloc decodes a relocation of expected section k from the element
+// bitstream and returns an index to the referenced element.
+func (r *Decoder) Reloc(k RelocKind) Index {
+ r.Sync(SyncUseReloc)
+ return r.rawReloc(k, r.Len())
+}
+
+// String decodes and returns a string value from the element
+// bitstream.
+func (r *Decoder) String() string {
+ r.Sync(SyncString)
+ return r.common.StringIdx(r.Reloc(RelocString))
+}
+
+// Strings decodes and returns a variable-length slice of strings from
+// the element bitstream.
+func (r *Decoder) Strings() []string {
+ res := make([]string, r.Len())
+ for i := range res {
+ res[i] = r.String()
+ }
+ return res
+}
+
+// Value decodes and returns a constant.Value from the element
+// bitstream.
+func (r *Decoder) Value() constant.Value {
+ r.Sync(SyncValue)
+ isComplex := r.Bool()
+ val := r.scalar()
+ if isComplex {
+ val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
+ }
+ return val
+}
+
+func (r *Decoder) scalar() constant.Value {
+ switch tag := CodeVal(r.Code(SyncVal)); tag {
+ default:
+ panic(fmt.Errorf("unexpected scalar tag: %v", tag))
+
+ case ValBool:
+ return constant.MakeBool(r.Bool())
+ case ValString:
+ return constant.MakeString(r.String())
+ case ValInt64:
+ return constant.MakeInt64(r.Int64())
+ case ValBigInt:
+ return constant.Make(r.bigInt())
+ case ValBigRat:
+ num := r.bigInt()
+ denom := r.bigInt()
+ return constant.Make(new(big.Rat).SetFrac(num, denom))
+ case ValBigFloat:
+ return constant.Make(r.bigFloat())
+ }
+}
+
+func (r *Decoder) bigInt() *big.Int {
+ v := new(big.Int).SetBytes([]byte(r.String()))
+ if r.Bool() {
+ v.Neg(v)
+ }
+ return v
+}
+
+func (r *Decoder) bigFloat() *big.Float {
+ v := new(big.Float).SetPrec(512)
+ assert(v.UnmarshalText([]byte(r.String())) == nil)
+ return v
+}
+
+// @@@ Helpers
+
+// TODO(mdempsky): These should probably be removed. I think they're a
+// smell that the export data format is not yet quite right.
+
+// PeekPkgPath returns the package path for the specified package
+// index.
+func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
+ var path string
+ {
+ r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
+ path = r.String()
+ pr.RetireDecoder(&r)
+ }
+ if path == "" {
+ path = pr.pkgPath
+ }
+ return path
+}
+
+// PeekObj returns the package path, object name, and CodeObj for the
+// specified object index.
+func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
+ var ridx Index
+ var name string
+ var rcode int
+ {
+ r := pr.TempDecoder(RelocName, idx, SyncObject1)
+ r.Sync(SyncSym)
+ r.Sync(SyncPkg)
+ ridx = r.Reloc(RelocPkg)
+ name = r.String()
+ rcode = r.Code(SyncCodeObj)
+ pr.RetireDecoder(&r)
+ }
+
+ path := pr.PeekPkgPath(ridx)
+ assert(name != "")
+
+ tag := CodeObj(rcode)
+
+ return path, name, tag
+}
diff --git a/src/internal/pkgbits/doc.go b/src/internal/pkgbits/doc.go
new file mode 100644
index 0000000..4862e39
--- /dev/null
+++ b/src/internal/pkgbits/doc.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package pkgbits implements low-level coding abstractions for
+// Unified IR's export data format.
+//
+// At a low-level, a package is a collection of bitstream elements.
+// Each element has a "kind" and a dense, non-negative index.
+// Elements can be randomly accessed given their kind and index.
+//
+// Individual elements are sequences of variable-length values (e.g.,
+// integers, booleans, strings, go/constant values, cross-references
+// to other elements). Package pkgbits provides APIs for encoding and
+// decoding these low-level values, but the details of mapping
+// higher-level Go constructs into elements is left to higher-level
+// abstractions.
+//
+// Elements may cross-reference each other with "relocations." For
+// example, an element representing a pointer type has a relocation
+// referring to the element type.
+//
+// Go constructs may be composed as a constellation of multiple
+// elements. For example, a declared function may have one element to
+// describe the object (e.g., its name, type, position), and a
+// separate element to describe its function body. This allows readers
+// some flexibility in efficiently seeking or re-reading data (e.g.,
+// inlining requires re-reading the function body for each inlined
+// call, without needing to re-read the object-level details).
+package pkgbits
diff --git a/src/internal/pkgbits/encoder.go b/src/internal/pkgbits/encoder.go
new file mode 100644
index 0000000..70a2cba
--- /dev/null
+++ b/src/internal/pkgbits/encoder.go
@@ -0,0 +1,394 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import (
+ "bytes"
+ "crypto/md5"
+ "encoding/binary"
+ "go/constant"
+ "io"
+ "math/big"
+ "runtime"
+ "strings"
+)
+
+// currentVersion is the current version number.
+//
+// - v0: initial prototype
+//
+// - v1: adds the flags uint32 word
+//
+// TODO(mdempsky): For the next version bump:
+// - remove the legacy "has init" bool from the public root
+// - remove obj's "derived func instance" bool
+const currentVersion uint32 = 1
+
+// A PkgEncoder provides methods for encoding a package's Unified IR
+// export data.
+type PkgEncoder struct {
+ // elems holds the bitstream for previously encoded elements.
+ elems [numRelocs][]string
+
+ // stringsIdx maps previously encoded strings to their index within
+ // the RelocString section, to allow deduplication. That is,
+ // elems[RelocString][stringsIdx[s]] == s (if present).
+ stringsIdx map[string]Index
+
+ // syncFrames is the number of frames to write at each sync
+ // marker. A negative value means sync markers are omitted.
+ syncFrames int
+}
+
+// SyncMarkers reports whether pw uses sync markers.
+func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
+
+// NewPkgEncoder returns an initialized PkgEncoder.
+//
+// syncFrames is the number of caller frames that should be serialized
+// at Sync points. Serializing additional frames results in larger
+// export data files, but can help diagnosing desync errors in
+// higher-level Unified IR reader/writer code. If syncFrames is
+// negative, then sync markers are omitted entirely.
+func NewPkgEncoder(syncFrames int) PkgEncoder {
+ return PkgEncoder{
+ stringsIdx: make(map[string]Index),
+ syncFrames: syncFrames,
+ }
+}
+
+// DumpTo writes the package's encoded data to out0 and returns the
+// package fingerprint.
+func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
+ h := md5.New()
+ out := io.MultiWriter(out0, h)
+
+ writeUint32 := func(x uint32) {
+ assert(binary.Write(out, binary.LittleEndian, x) == nil)
+ }
+
+ writeUint32(currentVersion)
+
+ var flags uint32
+ if pw.SyncMarkers() {
+ flags |= flagSyncMarkers
+ }
+ writeUint32(flags)
+
+ // Write elemEndsEnds.
+ var sum uint32
+ for _, elems := range &pw.elems {
+ sum += uint32(len(elems))
+ writeUint32(sum)
+ }
+
+ // Write elemEnds.
+ sum = 0
+ for _, elems := range &pw.elems {
+ for _, elem := range elems {
+ sum += uint32(len(elem))
+ writeUint32(sum)
+ }
+ }
+
+ // Write elemData.
+ for _, elems := range &pw.elems {
+ for _, elem := range elems {
+ _, err := io.WriteString(out, elem)
+ assert(err == nil)
+ }
+ }
+
+ // Write fingerprint.
+ copy(fingerprint[:], h.Sum(nil))
+ _, err := out0.Write(fingerprint[:])
+ assert(err == nil)
+
+ return
+}
+
+// StringIdx adds a string value to the strings section, if not
+// already present, and returns its index.
+func (pw *PkgEncoder) StringIdx(s string) Index {
+ if idx, ok := pw.stringsIdx[s]; ok {
+ assert(pw.elems[RelocString][idx] == s)
+ return idx
+ }
+
+ idx := Index(len(pw.elems[RelocString]))
+ pw.elems[RelocString] = append(pw.elems[RelocString], s)
+ pw.stringsIdx[s] = idx
+ return idx
+}
+
+// NewEncoder returns an Encoder for a new element within the given
+// section, and encodes the given SyncMarker as the start of the
+// element bitstream.
+func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
+ e := pw.NewEncoderRaw(k)
+ e.Sync(marker)
+ return e
+}
+
+// NewEncoderRaw returns an Encoder for a new element within the given
+// section.
+//
+// Most callers should use NewEncoder instead.
+func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
+ idx := Index(len(pw.elems[k]))
+ pw.elems[k] = append(pw.elems[k], "") // placeholder
+
+ return Encoder{
+ p: pw,
+ k: k,
+ Idx: idx,
+ }
+}
+
+// An Encoder provides methods for encoding an individual element's
+// bitstream data.
+type Encoder struct {
+ p *PkgEncoder
+
+ Relocs []RelocEnt
+ RelocMap map[RelocEnt]uint32
+ Data bytes.Buffer // accumulated element bitstream data
+
+ encodingRelocHeader bool
+
+ k RelocKind
+ Idx Index // index within relocation section
+}
+
+// Flush finalizes the element's bitstream and returns its Index.
+func (w *Encoder) Flush() Index {
+ var sb strings.Builder
+
+ // Backup the data so we write the relocations at the front.
+ var tmp bytes.Buffer
+ io.Copy(&tmp, &w.Data)
+
+ // TODO(mdempsky): Consider writing these out separately so they're
+ // easier to strip, along with function bodies, so that we can prune
+ // down to just the data that's relevant to go/types.
+ if w.encodingRelocHeader {
+ panic("encodingRelocHeader already true; recursive flush?")
+ }
+ w.encodingRelocHeader = true
+ w.Sync(SyncRelocs)
+ w.Len(len(w.Relocs))
+ for _, rEnt := range w.Relocs {
+ w.Sync(SyncReloc)
+ w.Len(int(rEnt.Kind))
+ w.Len(int(rEnt.Idx))
+ }
+
+ io.Copy(&sb, &w.Data)
+ io.Copy(&sb, &tmp)
+ w.p.elems[w.k][w.Idx] = sb.String()
+
+ return w.Idx
+}
+
+func (w *Encoder) checkErr(err error) {
+ if err != nil {
+ errorf("unexpected encoding error: %v", err)
+ }
+}
+
+func (w *Encoder) rawUvarint(x uint64) {
+ var buf [binary.MaxVarintLen64]byte
+ n := binary.PutUvarint(buf[:], x)
+ _, err := w.Data.Write(buf[:n])
+ w.checkErr(err)
+}
+
+func (w *Encoder) rawVarint(x int64) {
+ // Zig-zag encode.
+ ux := uint64(x) << 1
+ if x < 0 {
+ ux = ^ux
+ }
+
+ w.rawUvarint(ux)
+}
+
+func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
+ e := RelocEnt{r, idx}
+ if w.RelocMap != nil {
+ if i, ok := w.RelocMap[e]; ok {
+ return int(i)
+ }
+ } else {
+ w.RelocMap = make(map[RelocEnt]uint32)
+ }
+
+ i := len(w.Relocs)
+ w.RelocMap[e] = uint32(i)
+ w.Relocs = append(w.Relocs, e)
+ return i
+}
+
+func (w *Encoder) Sync(m SyncMarker) {
+ if !w.p.SyncMarkers() {
+ return
+ }
+
+ // Writing out stack frame string references requires working
+ // relocations, but writing out the relocations themselves involves
+ // sync markers. To prevent infinite recursion, we simply trim the
+ // stack frame for sync markers within the relocation header.
+ var frames []string
+ if !w.encodingRelocHeader && w.p.syncFrames > 0 {
+ pcs := make([]uintptr, w.p.syncFrames)
+ n := runtime.Callers(2, pcs)
+ frames = fmtFrames(pcs[:n]...)
+ }
+
+ // TODO(mdempsky): Save space by writing out stack frames as a
+ // linked list so we can share common stack frames.
+ w.rawUvarint(uint64(m))
+ w.rawUvarint(uint64(len(frames)))
+ for _, frame := range frames {
+ w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
+ }
+}
+
+// Bool encodes and writes a bool value into the element bitstream,
+// and then returns the bool value.
+//
+// For simple, 2-alternative encodings, the idiomatic way to call Bool
+// is something like:
+//
+// if w.Bool(x != 0) {
+// // alternative #1
+// } else {
+// // alternative #2
+// }
+//
+// For multi-alternative encodings, use Code instead.
+func (w *Encoder) Bool(b bool) bool {
+ w.Sync(SyncBool)
+ var x byte
+ if b {
+ x = 1
+ }
+ err := w.Data.WriteByte(x)
+ w.checkErr(err)
+ return b
+}
+
+// Int64 encodes and writes an int64 value into the element bitstream.
+func (w *Encoder) Int64(x int64) {
+ w.Sync(SyncInt64)
+ w.rawVarint(x)
+}
+
+// Uint64 encodes and writes a uint64 value into the element bitstream.
+func (w *Encoder) Uint64(x uint64) {
+ w.Sync(SyncUint64)
+ w.rawUvarint(x)
+}
+
+// Len encodes and writes a non-negative int value into the element bitstream.
+func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
+
+// Int encodes and writes an int value into the element bitstream.
+func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
+
+// Len encodes and writes a uint value into the element bitstream.
+func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
+
+// Reloc encodes and writes a relocation for the given (section,
+// index) pair into the element bitstream.
+//
+// Note: Only the index is formally written into the element
+// bitstream, so bitstream decoders must know from context which
+// section an encoded relocation refers to.
+func (w *Encoder) Reloc(r RelocKind, idx Index) {
+ w.Sync(SyncUseReloc)
+ w.Len(w.rawReloc(r, idx))
+}
+
+// Code encodes and writes a Code value into the element bitstream.
+func (w *Encoder) Code(c Code) {
+ w.Sync(c.Marker())
+ w.Len(c.Value())
+}
+
+// String encodes and writes a string value into the element
+// bitstream.
+//
+// Internally, strings are deduplicated by adding them to the strings
+// section (if not already present), and then writing a relocation
+// into the element bitstream.
+func (w *Encoder) String(s string) {
+ w.StringRef(w.p.StringIdx(s))
+}
+
+// StringRef writes a reference to the given index, which must be a
+// previously encoded string value.
+func (w *Encoder) StringRef(idx Index) {
+ w.Sync(SyncString)
+ w.Reloc(RelocString, idx)
+}
+
+// Strings encodes and writes a variable-length slice of strings into
+// the element bitstream.
+func (w *Encoder) Strings(ss []string) {
+ w.Len(len(ss))
+ for _, s := range ss {
+ w.String(s)
+ }
+}
+
+// Value encodes and writes a constant.Value into the element
+// bitstream.
+func (w *Encoder) Value(val constant.Value) {
+ w.Sync(SyncValue)
+ if w.Bool(val.Kind() == constant.Complex) {
+ w.scalar(constant.Real(val))
+ w.scalar(constant.Imag(val))
+ } else {
+ w.scalar(val)
+ }
+}
+
+func (w *Encoder) scalar(val constant.Value) {
+ switch v := constant.Val(val).(type) {
+ default:
+ errorf("unhandled %v (%v)", val, val.Kind())
+ case bool:
+ w.Code(ValBool)
+ w.Bool(v)
+ case string:
+ w.Code(ValString)
+ w.String(v)
+ case int64:
+ w.Code(ValInt64)
+ w.Int64(v)
+ case *big.Int:
+ w.Code(ValBigInt)
+ w.bigInt(v)
+ case *big.Rat:
+ w.Code(ValBigRat)
+ w.bigInt(v.Num())
+ w.bigInt(v.Denom())
+ case *big.Float:
+ w.Code(ValBigFloat)
+ w.bigFloat(v)
+ }
+}
+
+func (w *Encoder) bigInt(v *big.Int) {
+ b := v.Bytes()
+ w.String(string(b)) // TODO: More efficient encoding.
+ w.Bool(v.Sign() < 0)
+}
+
+func (w *Encoder) bigFloat(v *big.Float) {
+ b := v.Append(nil, 'p', -1)
+ w.String(string(b)) // TODO: More efficient encoding.
+}
diff --git a/src/internal/pkgbits/flags.go b/src/internal/pkgbits/flags.go
new file mode 100644
index 0000000..6542227
--- /dev/null
+++ b/src/internal/pkgbits/flags.go
@@ -0,0 +1,9 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+const (
+ flagSyncMarkers = 1 << iota // file format contains sync markers
+)
diff --git a/src/internal/pkgbits/reloc.go b/src/internal/pkgbits/reloc.go
new file mode 100644
index 0000000..fcdfb97
--- /dev/null
+++ b/src/internal/pkgbits/reloc.go
@@ -0,0 +1,42 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+// A RelocKind indicates a particular section within a unified IR export.
+type RelocKind int32
+
+// An Index represents a bitstream element index within a particular
+// section.
+type Index int32
+
+// A relocEnt (relocation entry) is an entry in an element's local
+// reference table.
+//
+// TODO(mdempsky): Rename this too.
+type RelocEnt struct {
+ Kind RelocKind
+ Idx Index
+}
+
+// Reserved indices within the meta relocation section.
+const (
+ PublicRootIdx Index = 0
+ PrivateRootIdx Index = 1
+)
+
+const (
+ RelocString RelocKind = iota
+ RelocMeta
+ RelocPosBase
+ RelocPkg
+ RelocName
+ RelocType
+ RelocObj
+ RelocObjExt
+ RelocObjDict
+ RelocBody
+
+ numRelocs = iota
+)
diff --git a/src/internal/pkgbits/support.go b/src/internal/pkgbits/support.go
new file mode 100644
index 0000000..f7579df
--- /dev/null
+++ b/src/internal/pkgbits/support.go
@@ -0,0 +1,17 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import "fmt"
+
+func assert(b bool) {
+ if !b {
+ panic("assertion failed")
+ }
+}
+
+func errorf(format string, args ...any) {
+ panic(fmt.Errorf(format, args...))
+}
diff --git a/src/internal/pkgbits/sync.go b/src/internal/pkgbits/sync.go
new file mode 100644
index 0000000..1520b73
--- /dev/null
+++ b/src/internal/pkgbits/sync.go
@@ -0,0 +1,136 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import (
+ "fmt"
+ "runtime"
+ "strings"
+)
+
+// fmtFrames formats a backtrace for reporting reader/writer desyncs.
+func fmtFrames(pcs ...uintptr) []string {
+ res := make([]string, 0, len(pcs))
+ walkFrames(pcs, func(file string, line int, name string, offset uintptr) {
+ // Trim package from function name. It's just redundant noise.
+ name = strings.TrimPrefix(name, "cmd/compile/internal/noder.")
+
+ res = append(res, fmt.Sprintf("%s:%v: %s +0x%v", file, line, name, offset))
+ })
+ return res
+}
+
+type frameVisitor func(file string, line int, name string, offset uintptr)
+
+// walkFrames calls visit for each call frame represented by pcs.
+//
+// pcs should be a slice of PCs, as returned by runtime.Callers.
+func walkFrames(pcs []uintptr, visit frameVisitor) {
+ if len(pcs) == 0 {
+ return
+ }
+
+ frames := runtime.CallersFrames(pcs)
+ for {
+ frame, more := frames.Next()
+ visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry)
+ if !more {
+ return
+ }
+ }
+}
+
+// SyncMarker is an enum type that represents markers that may be
+// written to export data to ensure the reader and writer stay
+// synchronized.
+type SyncMarker int
+
+//go:generate stringer -type=SyncMarker -trimprefix=Sync
+
+const (
+ _ SyncMarker = iota
+
+ // Public markers (known to go/types importers).
+
+ // Low-level coding markers.
+ SyncEOF
+ SyncBool
+ SyncInt64
+ SyncUint64
+ SyncString
+ SyncValue
+ SyncVal
+ SyncRelocs
+ SyncReloc
+ SyncUseReloc
+
+ // Higher-level object and type markers.
+ SyncPublic
+ SyncPos
+ SyncPosBase
+ SyncObject
+ SyncObject1
+ SyncPkg
+ SyncPkgDef
+ SyncMethod
+ SyncType
+ SyncTypeIdx
+ SyncTypeParamNames
+ SyncSignature
+ SyncParams
+ SyncParam
+ SyncCodeObj
+ SyncSym
+ SyncLocalIdent
+ SyncSelector
+
+ // Private markers (only known to cmd/compile).
+ SyncPrivate
+
+ SyncFuncExt
+ SyncVarExt
+ SyncTypeExt
+ SyncPragma
+
+ SyncExprList
+ SyncExprs
+ SyncExpr
+ SyncExprType
+ SyncAssign
+ SyncOp
+ SyncFuncLit
+ SyncCompLit
+
+ SyncDecl
+ SyncFuncBody
+ SyncOpenScope
+ SyncCloseScope
+ SyncCloseAnotherScope
+ SyncDeclNames
+ SyncDeclName
+
+ SyncStmts
+ SyncBlockStmt
+ SyncIfStmt
+ SyncForStmt
+ SyncSwitchStmt
+ SyncRangeStmt
+ SyncCaseClause
+ SyncCommClause
+ SyncSelectStmt
+ SyncDecls
+ SyncLabeledStmt
+ SyncUseObjLocal
+ SyncAddLocal
+ SyncLinkname
+ SyncStmt1
+ SyncStmtsEnd
+ SyncLabel
+ SyncOptLabel
+
+ SyncMultiExpr
+ SyncRType
+ SyncConvRTTI
+)
diff --git a/src/internal/pkgbits/syncmarker_string.go b/src/internal/pkgbits/syncmarker_string.go
new file mode 100644
index 0000000..4a5b0ca
--- /dev/null
+++ b/src/internal/pkgbits/syncmarker_string.go
@@ -0,0 +1,89 @@
+// Code generated by "stringer -type=SyncMarker -trimprefix=Sync"; DO NOT EDIT.
+
+package pkgbits
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[SyncEOF-1]
+ _ = x[SyncBool-2]
+ _ = x[SyncInt64-3]
+ _ = x[SyncUint64-4]
+ _ = x[SyncString-5]
+ _ = x[SyncValue-6]
+ _ = x[SyncVal-7]
+ _ = x[SyncRelocs-8]
+ _ = x[SyncReloc-9]
+ _ = x[SyncUseReloc-10]
+ _ = x[SyncPublic-11]
+ _ = x[SyncPos-12]
+ _ = x[SyncPosBase-13]
+ _ = x[SyncObject-14]
+ _ = x[SyncObject1-15]
+ _ = x[SyncPkg-16]
+ _ = x[SyncPkgDef-17]
+ _ = x[SyncMethod-18]
+ _ = x[SyncType-19]
+ _ = x[SyncTypeIdx-20]
+ _ = x[SyncTypeParamNames-21]
+ _ = x[SyncSignature-22]
+ _ = x[SyncParams-23]
+ _ = x[SyncParam-24]
+ _ = x[SyncCodeObj-25]
+ _ = x[SyncSym-26]
+ _ = x[SyncLocalIdent-27]
+ _ = x[SyncSelector-28]
+ _ = x[SyncPrivate-29]
+ _ = x[SyncFuncExt-30]
+ _ = x[SyncVarExt-31]
+ _ = x[SyncTypeExt-32]
+ _ = x[SyncPragma-33]
+ _ = x[SyncExprList-34]
+ _ = x[SyncExprs-35]
+ _ = x[SyncExpr-36]
+ _ = x[SyncExprType-37]
+ _ = x[SyncAssign-38]
+ _ = x[SyncOp-39]
+ _ = x[SyncFuncLit-40]
+ _ = x[SyncCompLit-41]
+ _ = x[SyncDecl-42]
+ _ = x[SyncFuncBody-43]
+ _ = x[SyncOpenScope-44]
+ _ = x[SyncCloseScope-45]
+ _ = x[SyncCloseAnotherScope-46]
+ _ = x[SyncDeclNames-47]
+ _ = x[SyncDeclName-48]
+ _ = x[SyncStmts-49]
+ _ = x[SyncBlockStmt-50]
+ _ = x[SyncIfStmt-51]
+ _ = x[SyncForStmt-52]
+ _ = x[SyncSwitchStmt-53]
+ _ = x[SyncRangeStmt-54]
+ _ = x[SyncCaseClause-55]
+ _ = x[SyncCommClause-56]
+ _ = x[SyncSelectStmt-57]
+ _ = x[SyncDecls-58]
+ _ = x[SyncLabeledStmt-59]
+ _ = x[SyncUseObjLocal-60]
+ _ = x[SyncAddLocal-61]
+ _ = x[SyncLinkname-62]
+ _ = x[SyncStmt1-63]
+ _ = x[SyncStmtsEnd-64]
+ _ = x[SyncLabel-65]
+ _ = x[SyncOptLabel-66]
+}
+
+const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabel"
+
+var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458}
+
+func (i SyncMarker) String() string {
+ i -= 1
+ if i < 0 || i >= SyncMarker(len(_SyncMarker_index)-1) {
+ return "SyncMarker(" + strconv.FormatInt(int64(i+1), 10) + ")"
+ }
+ return _SyncMarker_name[_SyncMarker_index[i]:_SyncMarker_index[i+1]]
+}