diff options
Diffstat (limited to 'src/internal/pkgbits')
-rw-r--r-- | src/internal/pkgbits/codes.go | 77 | ||||
-rw-r--r-- | src/internal/pkgbits/decoder.go | 515 | ||||
-rw-r--r-- | src/internal/pkgbits/doc.go | 30 | ||||
-rw-r--r-- | src/internal/pkgbits/encoder.go | 394 | ||||
-rw-r--r-- | src/internal/pkgbits/flags.go | 9 | ||||
-rw-r--r-- | src/internal/pkgbits/reloc.go | 42 | ||||
-rw-r--r-- | src/internal/pkgbits/support.go | 17 | ||||
-rw-r--r-- | src/internal/pkgbits/sync.go | 136 | ||||
-rw-r--r-- | src/internal/pkgbits/syncmarker_string.go | 89 |
9 files changed, 1309 insertions, 0 deletions
diff --git a/src/internal/pkgbits/codes.go b/src/internal/pkgbits/codes.go new file mode 100644 index 0000000..f0cabde --- /dev/null +++ b/src/internal/pkgbits/codes.go @@ -0,0 +1,77 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A Code is an enum value that can be encoded into bitstreams. +// +// Code types are preferable for enum types, because they allow +// Decoder to detect desyncs. +type Code interface { + // Marker returns the SyncMarker for the Code's dynamic type. + Marker() SyncMarker + + // Value returns the Code's ordinal value. + Value() int +} + +// A CodeVal distinguishes among go/constant.Value encodings. +type CodeVal int + +func (c CodeVal) Marker() SyncMarker { return SyncVal } +func (c CodeVal) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ValBool CodeVal = iota + ValString + ValInt64 + ValBigInt + ValBigRat + ValBigFloat +) + +// A CodeType distinguishes among go/types.Type encodings. +type CodeType int + +func (c CodeType) Marker() SyncMarker { return SyncType } +func (c CodeType) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + TypeBasic CodeType = iota + TypeNamed + TypePointer + TypeSlice + TypeArray + TypeChan + TypeMap + TypeSignature + TypeStruct + TypeInterface + TypeUnion + TypeTypeParam +) + +// A CodeObj distinguishes among go/types.Object encodings. +type CodeObj int + +func (c CodeObj) Marker() SyncMarker { return SyncCodeObj } +func (c CodeObj) Value() int { return int(c) } + +// Note: These values are public and cannot be changed without +// updating the go/types importers. + +const ( + ObjAlias CodeObj = iota + ObjConst + ObjType + ObjFunc + ObjVar + ObjStub +) diff --git a/src/internal/pkgbits/decoder.go b/src/internal/pkgbits/decoder.go new file mode 100644 index 0000000..4fe024d --- /dev/null +++ b/src/internal/pkgbits/decoder.go @@ -0,0 +1,515 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "encoding/binary" + "errors" + "fmt" + "go/constant" + "go/token" + "io" + "math/big" + "os" + "runtime" + "strings" +) + +// A PkgDecoder provides methods for decoding a package's Unified IR +// export data. +type PkgDecoder struct { + // version is the file format version. + version uint32 + + // sync indicates whether the file uses sync markers. + sync bool + + // pkgPath is the package path for the package to be decoded. + // + // TODO(mdempsky): Remove; unneeded since CL 391014. + pkgPath string + + // elemData is the full data payload of the encoded package. + // Elements are densely and contiguously packed together. + // + // The last 8 bytes of elemData are the package fingerprint. + elemData string + + // elemEnds stores the byte-offset end positions of element + // bitstreams within elemData. + // + // For example, element I's bitstream data starts at elemEnds[I-1] + // (or 0, if I==0) and ends at elemEnds[I]. + // + // Note: elemEnds is indexed by absolute indices, not + // section-relative indices. + elemEnds []uint32 + + // elemEndsEnds stores the index-offset end positions of relocation + // sections within elemEnds. + // + // For example, section K's end positions start at elemEndsEnds[K-1] + // (or 0, if K==0) and end at elemEndsEnds[K]. + elemEndsEnds [numRelocs]uint32 + + scratchRelocEnt []RelocEnt +} + +// PkgPath returns the package path for the package +// +// TODO(mdempsky): Remove; unneeded since CL 391014. +func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath } + +// SyncMarkers reports whether pr uses sync markers. +func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } + +// NewPkgDecoder returns a PkgDecoder initialized to read the Unified +// IR export data from input. pkgPath is the package path for the +// compilation unit that produced the export data. +// +// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. +func NewPkgDecoder(pkgPath, input string) PkgDecoder { + pr := PkgDecoder{ + pkgPath: pkgPath, + } + + // TODO(mdempsky): Implement direct indexing of input string to + // avoid copying the position information. + + r := strings.NewReader(input) + + assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) + + switch pr.version { + default: + panic(fmt.Errorf("unsupported version: %v", pr.version)) + case 0: + // no flags + case 1: + var flags uint32 + assert(binary.Read(r, binary.LittleEndian, &flags) == nil) + pr.sync = flags&flagSyncMarkers != 0 + } + + assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil) + + pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1]) + assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil) + + pos, err := r.Seek(0, io.SeekCurrent) + assert(err == nil) + + pr.elemData = input[pos:] + assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) + + return pr +} + +// NumElems returns the number of elements in section k. +func (pr *PkgDecoder) NumElems(k RelocKind) int { + count := int(pr.elemEndsEnds[k]) + if k > 0 { + count -= int(pr.elemEndsEnds[k-1]) + } + return count +} + +// TotalElems returns the total number of elements across all sections. +func (pr *PkgDecoder) TotalElems() int { + return len(pr.elemEnds) +} + +// Fingerprint returns the package fingerprint. +func (pr *PkgDecoder) Fingerprint() [8]byte { + var fp [8]byte + copy(fp[:], pr.elemData[len(pr.elemData)-8:]) + return fp +} + +// AbsIdx returns the absolute index for the given (section, index) +// pair. +func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { + absIdx := int(idx) + if k > 0 { + absIdx += int(pr.elemEndsEnds[k-1]) + } + if absIdx >= int(pr.elemEndsEnds[k]) { + errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) + } + return absIdx +} + +// DataIdx returns the raw element bitstream for the given (section, +// index) pair. +func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string { + absIdx := pr.AbsIdx(k, idx) + + var start uint32 + if absIdx > 0 { + start = pr.elemEnds[absIdx-1] + } + end := pr.elemEnds[absIdx] + + return pr.elemData[start:end] +} + +// StringIdx returns the string value for the given string index. +func (pr *PkgDecoder) StringIdx(idx Index) string { + return pr.DataIdx(RelocString, idx) +} + +// NewDecoder returns a Decoder for the given (section, index) pair, +// and decodes the given SyncMarker from the element bitstream. +func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { + r := pr.NewDecoderRaw(k, idx) + r.Sync(marker) + return r +} + +// TempDecoder returns a Decoder for the given (section, index) pair, +// and decodes the given SyncMarker from the element bitstream. +// If possible the Decoder should be RetireDecoder'd when it is no longer +// needed, this will avoid heap allocations. +func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder { + r := pr.TempDecoderRaw(k, idx) + r.Sync(marker) + return r +} + +func (pr *PkgDecoder) RetireDecoder(d *Decoder) { + pr.scratchRelocEnt = d.Relocs + d.Relocs = nil +} + +// NewDecoderRaw returns a Decoder for the given (section, index) pair. +// +// Most callers should use NewDecoder instead. +func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { + r := Decoder{ + common: pr, + k: k, + Idx: idx, + } + + r.Data.Reset(pr.DataIdx(k, idx)) + r.Sync(SyncRelocs) + r.Relocs = make([]RelocEnt, r.Len()) + for i := range r.Relocs { + r.Sync(SyncReloc) + r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} + } + + return r +} + +func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder { + r := Decoder{ + common: pr, + k: k, + Idx: idx, + } + + r.Data.Reset(pr.DataIdx(k, idx)) + r.Sync(SyncRelocs) + l := r.Len() + if cap(pr.scratchRelocEnt) >= l { + r.Relocs = pr.scratchRelocEnt[:l] + pr.scratchRelocEnt = nil + } else { + r.Relocs = make([]RelocEnt, l) + } + for i := range r.Relocs { + r.Sync(SyncReloc) + r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())} + } + + return r +} + +// A Decoder provides methods for decoding an individual element's +// bitstream data. +type Decoder struct { + common *PkgDecoder + + Relocs []RelocEnt + Data strings.Reader + + k RelocKind + Idx Index +} + +func (r *Decoder) checkErr(err error) { + if err != nil { + errorf("unexpected decoding error: %w", err) + } +} + +func (r *Decoder) rawUvarint() uint64 { + x, err := readUvarint(&r.Data) + r.checkErr(err) + return x +} + +// readUvarint is a type-specialized copy of encoding/binary.ReadUvarint. +// This avoids the interface conversion and thus has better escape properties, +// which flows up the stack. +func readUvarint(r *strings.Reader) (uint64, error) { + var x uint64 + var s uint + for i := 0; i < binary.MaxVarintLen64; i++ { + b, err := r.ReadByte() + if err != nil { + if i > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return x, err + } + if b < 0x80 { + if i == binary.MaxVarintLen64-1 && b > 1 { + return x, overflow + } + return x | uint64(b)<<s, nil + } + x |= uint64(b&0x7f) << s + s += 7 + } + return x, overflow +} + +var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer") + +func (r *Decoder) rawVarint() int64 { + ux := r.rawUvarint() + + // Zig-zag decode. + x := int64(ux >> 1) + if ux&1 != 0 { + x = ^x + } + return x +} + +func (r *Decoder) rawReloc(k RelocKind, idx int) Index { + e := r.Relocs[idx] + assert(e.Kind == k) + return e.Idx +} + +// Sync decodes a sync marker from the element bitstream and asserts +// that it matches the expected marker. +// +// If EnableSync is false, then Sync is a no-op. +func (r *Decoder) Sync(mWant SyncMarker) { + if !r.common.sync { + return + } + + pos, _ := r.Data.Seek(0, io.SeekCurrent) + mHave := SyncMarker(r.rawUvarint()) + writerPCs := make([]int, r.rawUvarint()) + for i := range writerPCs { + writerPCs[i] = int(r.rawUvarint()) + } + + if mHave == mWant { + return + } + + // There's some tension here between printing: + // + // (1) full file paths that tools can recognize (e.g., so emacs + // hyperlinks the "file:line" text for easy navigation), or + // + // (2) short file paths that are easier for humans to read (e.g., by + // omitting redundant or irrelevant details, so it's easier to + // focus on the useful bits that remain). + // + // The current formatting favors the former, as it seems more + // helpful in practice. But perhaps the formatting could be improved + // to better address both concerns. For example, use relative file + // paths if they would be shorter, or rewrite file paths to contain + // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how + // to reliably expand that again. + + fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos) + + fmt.Printf("\nfound %v, written at:\n", mHave) + if len(writerPCs) == 0 { + fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath) + } + for _, pc := range writerPCs { + fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc))) + } + + fmt.Printf("\nexpected %v, reading at:\n", mWant) + var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size? + n := runtime.Callers(2, readerPCs[:]) + for _, pc := range fmtFrames(readerPCs[:n]...) { + fmt.Printf("\t%s\n", pc) + } + + // We already printed a stack trace for the reader, so now we can + // simply exit. Printing a second one with panic or base.Fatalf + // would just be noise. + os.Exit(1) +} + +// Bool decodes and returns a bool value from the element bitstream. +func (r *Decoder) Bool() bool { + r.Sync(SyncBool) + x, err := r.Data.ReadByte() + r.checkErr(err) + assert(x < 2) + return x != 0 +} + +// Int64 decodes and returns an int64 value from the element bitstream. +func (r *Decoder) Int64() int64 { + r.Sync(SyncInt64) + return r.rawVarint() +} + +// Int64 decodes and returns a uint64 value from the element bitstream. +func (r *Decoder) Uint64() uint64 { + r.Sync(SyncUint64) + return r.rawUvarint() +} + +// Len decodes and returns a non-negative int value from the element bitstream. +func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v } + +// Int decodes and returns an int value from the element bitstream. +func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v } + +// Uint decodes and returns a uint value from the element bitstream. +func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v } + +// Code decodes a Code value from the element bitstream and returns +// its ordinal value. It's the caller's responsibility to convert the +// result to an appropriate Code type. +// +// TODO(mdempsky): Ideally this method would have signature "Code[T +// Code] T" instead, but we don't allow generic methods and the +// compiler can't depend on generics yet anyway. +func (r *Decoder) Code(mark SyncMarker) int { + r.Sync(mark) + return r.Len() +} + +// Reloc decodes a relocation of expected section k from the element +// bitstream and returns an index to the referenced element. +func (r *Decoder) Reloc(k RelocKind) Index { + r.Sync(SyncUseReloc) + return r.rawReloc(k, r.Len()) +} + +// String decodes and returns a string value from the element +// bitstream. +func (r *Decoder) String() string { + r.Sync(SyncString) + return r.common.StringIdx(r.Reloc(RelocString)) +} + +// Strings decodes and returns a variable-length slice of strings from +// the element bitstream. +func (r *Decoder) Strings() []string { + res := make([]string, r.Len()) + for i := range res { + res[i] = r.String() + } + return res +} + +// Value decodes and returns a constant.Value from the element +// bitstream. +func (r *Decoder) Value() constant.Value { + r.Sync(SyncValue) + isComplex := r.Bool() + val := r.scalar() + if isComplex { + val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar())) + } + return val +} + +func (r *Decoder) scalar() constant.Value { + switch tag := CodeVal(r.Code(SyncVal)); tag { + default: + panic(fmt.Errorf("unexpected scalar tag: %v", tag)) + + case ValBool: + return constant.MakeBool(r.Bool()) + case ValString: + return constant.MakeString(r.String()) + case ValInt64: + return constant.MakeInt64(r.Int64()) + case ValBigInt: + return constant.Make(r.bigInt()) + case ValBigRat: + num := r.bigInt() + denom := r.bigInt() + return constant.Make(new(big.Rat).SetFrac(num, denom)) + case ValBigFloat: + return constant.Make(r.bigFloat()) + } +} + +func (r *Decoder) bigInt() *big.Int { + v := new(big.Int).SetBytes([]byte(r.String())) + if r.Bool() { + v.Neg(v) + } + return v +} + +func (r *Decoder) bigFloat() *big.Float { + v := new(big.Float).SetPrec(512) + assert(v.UnmarshalText([]byte(r.String())) == nil) + return v +} + +// @@@ Helpers + +// TODO(mdempsky): These should probably be removed. I think they're a +// smell that the export data format is not yet quite right. + +// PeekPkgPath returns the package path for the specified package +// index. +func (pr *PkgDecoder) PeekPkgPath(idx Index) string { + var path string + { + r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef) + path = r.String() + pr.RetireDecoder(&r) + } + if path == "" { + path = pr.pkgPath + } + return path +} + +// PeekObj returns the package path, object name, and CodeObj for the +// specified object index. +func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { + var ridx Index + var name string + var rcode int + { + r := pr.TempDecoder(RelocName, idx, SyncObject1) + r.Sync(SyncSym) + r.Sync(SyncPkg) + ridx = r.Reloc(RelocPkg) + name = r.String() + rcode = r.Code(SyncCodeObj) + pr.RetireDecoder(&r) + } + + path := pr.PeekPkgPath(ridx) + assert(name != "") + + tag := CodeObj(rcode) + + return path, name, tag +} diff --git a/src/internal/pkgbits/doc.go b/src/internal/pkgbits/doc.go new file mode 100644 index 0000000..4862e39 --- /dev/null +++ b/src/internal/pkgbits/doc.go @@ -0,0 +1,30 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package pkgbits implements low-level coding abstractions for +// Unified IR's export data format. +// +// At a low-level, a package is a collection of bitstream elements. +// Each element has a "kind" and a dense, non-negative index. +// Elements can be randomly accessed given their kind and index. +// +// Individual elements are sequences of variable-length values (e.g., +// integers, booleans, strings, go/constant values, cross-references +// to other elements). Package pkgbits provides APIs for encoding and +// decoding these low-level values, but the details of mapping +// higher-level Go constructs into elements is left to higher-level +// abstractions. +// +// Elements may cross-reference each other with "relocations." For +// example, an element representing a pointer type has a relocation +// referring to the element type. +// +// Go constructs may be composed as a constellation of multiple +// elements. For example, a declared function may have one element to +// describe the object (e.g., its name, type, position), and a +// separate element to describe its function body. This allows readers +// some flexibility in efficiently seeking or re-reading data (e.g., +// inlining requires re-reading the function body for each inlined +// call, without needing to re-read the object-level details). +package pkgbits diff --git a/src/internal/pkgbits/encoder.go b/src/internal/pkgbits/encoder.go new file mode 100644 index 0000000..70a2cba --- /dev/null +++ b/src/internal/pkgbits/encoder.go @@ -0,0 +1,394 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "bytes" + "crypto/md5" + "encoding/binary" + "go/constant" + "io" + "math/big" + "runtime" + "strings" +) + +// currentVersion is the current version number. +// +// - v0: initial prototype +// +// - v1: adds the flags uint32 word +// +// TODO(mdempsky): For the next version bump: +// - remove the legacy "has init" bool from the public root +// - remove obj's "derived func instance" bool +const currentVersion uint32 = 1 + +// A PkgEncoder provides methods for encoding a package's Unified IR +// export data. +type PkgEncoder struct { + // elems holds the bitstream for previously encoded elements. + elems [numRelocs][]string + + // stringsIdx maps previously encoded strings to their index within + // the RelocString section, to allow deduplication. That is, + // elems[RelocString][stringsIdx[s]] == s (if present). + stringsIdx map[string]Index + + // syncFrames is the number of frames to write at each sync + // marker. A negative value means sync markers are omitted. + syncFrames int +} + +// SyncMarkers reports whether pw uses sync markers. +func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } + +// NewPkgEncoder returns an initialized PkgEncoder. +// +// syncFrames is the number of caller frames that should be serialized +// at Sync points. Serializing additional frames results in larger +// export data files, but can help diagnosing desync errors in +// higher-level Unified IR reader/writer code. If syncFrames is +// negative, then sync markers are omitted entirely. +func NewPkgEncoder(syncFrames int) PkgEncoder { + return PkgEncoder{ + stringsIdx: make(map[string]Index), + syncFrames: syncFrames, + } +} + +// DumpTo writes the package's encoded data to out0 and returns the +// package fingerprint. +func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { + h := md5.New() + out := io.MultiWriter(out0, h) + + writeUint32 := func(x uint32) { + assert(binary.Write(out, binary.LittleEndian, x) == nil) + } + + writeUint32(currentVersion) + + var flags uint32 + if pw.SyncMarkers() { + flags |= flagSyncMarkers + } + writeUint32(flags) + + // Write elemEndsEnds. + var sum uint32 + for _, elems := range &pw.elems { + sum += uint32(len(elems)) + writeUint32(sum) + } + + // Write elemEnds. + sum = 0 + for _, elems := range &pw.elems { + for _, elem := range elems { + sum += uint32(len(elem)) + writeUint32(sum) + } + } + + // Write elemData. + for _, elems := range &pw.elems { + for _, elem := range elems { + _, err := io.WriteString(out, elem) + assert(err == nil) + } + } + + // Write fingerprint. + copy(fingerprint[:], h.Sum(nil)) + _, err := out0.Write(fingerprint[:]) + assert(err == nil) + + return +} + +// StringIdx adds a string value to the strings section, if not +// already present, and returns its index. +func (pw *PkgEncoder) StringIdx(s string) Index { + if idx, ok := pw.stringsIdx[s]; ok { + assert(pw.elems[RelocString][idx] == s) + return idx + } + + idx := Index(len(pw.elems[RelocString])) + pw.elems[RelocString] = append(pw.elems[RelocString], s) + pw.stringsIdx[s] = idx + return idx +} + +// NewEncoder returns an Encoder for a new element within the given +// section, and encodes the given SyncMarker as the start of the +// element bitstream. +func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { + e := pw.NewEncoderRaw(k) + e.Sync(marker) + return e +} + +// NewEncoderRaw returns an Encoder for a new element within the given +// section. +// +// Most callers should use NewEncoder instead. +func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { + idx := Index(len(pw.elems[k])) + pw.elems[k] = append(pw.elems[k], "") // placeholder + + return Encoder{ + p: pw, + k: k, + Idx: idx, + } +} + +// An Encoder provides methods for encoding an individual element's +// bitstream data. +type Encoder struct { + p *PkgEncoder + + Relocs []RelocEnt + RelocMap map[RelocEnt]uint32 + Data bytes.Buffer // accumulated element bitstream data + + encodingRelocHeader bool + + k RelocKind + Idx Index // index within relocation section +} + +// Flush finalizes the element's bitstream and returns its Index. +func (w *Encoder) Flush() Index { + var sb strings.Builder + + // Backup the data so we write the relocations at the front. + var tmp bytes.Buffer + io.Copy(&tmp, &w.Data) + + // TODO(mdempsky): Consider writing these out separately so they're + // easier to strip, along with function bodies, so that we can prune + // down to just the data that's relevant to go/types. + if w.encodingRelocHeader { + panic("encodingRelocHeader already true; recursive flush?") + } + w.encodingRelocHeader = true + w.Sync(SyncRelocs) + w.Len(len(w.Relocs)) + for _, rEnt := range w.Relocs { + w.Sync(SyncReloc) + w.Len(int(rEnt.Kind)) + w.Len(int(rEnt.Idx)) + } + + io.Copy(&sb, &w.Data) + io.Copy(&sb, &tmp) + w.p.elems[w.k][w.Idx] = sb.String() + + return w.Idx +} + +func (w *Encoder) checkErr(err error) { + if err != nil { + errorf("unexpected encoding error: %v", err) + } +} + +func (w *Encoder) rawUvarint(x uint64) { + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], x) + _, err := w.Data.Write(buf[:n]) + w.checkErr(err) +} + +func (w *Encoder) rawVarint(x int64) { + // Zig-zag encode. + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + + w.rawUvarint(ux) +} + +func (w *Encoder) rawReloc(r RelocKind, idx Index) int { + e := RelocEnt{r, idx} + if w.RelocMap != nil { + if i, ok := w.RelocMap[e]; ok { + return int(i) + } + } else { + w.RelocMap = make(map[RelocEnt]uint32) + } + + i := len(w.Relocs) + w.RelocMap[e] = uint32(i) + w.Relocs = append(w.Relocs, e) + return i +} + +func (w *Encoder) Sync(m SyncMarker) { + if !w.p.SyncMarkers() { + return + } + + // Writing out stack frame string references requires working + // relocations, but writing out the relocations themselves involves + // sync markers. To prevent infinite recursion, we simply trim the + // stack frame for sync markers within the relocation header. + var frames []string + if !w.encodingRelocHeader && w.p.syncFrames > 0 { + pcs := make([]uintptr, w.p.syncFrames) + n := runtime.Callers(2, pcs) + frames = fmtFrames(pcs[:n]...) + } + + // TODO(mdempsky): Save space by writing out stack frames as a + // linked list so we can share common stack frames. + w.rawUvarint(uint64(m)) + w.rawUvarint(uint64(len(frames))) + for _, frame := range frames { + w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) + } +} + +// Bool encodes and writes a bool value into the element bitstream, +// and then returns the bool value. +// +// For simple, 2-alternative encodings, the idiomatic way to call Bool +// is something like: +// +// if w.Bool(x != 0) { +// // alternative #1 +// } else { +// // alternative #2 +// } +// +// For multi-alternative encodings, use Code instead. +func (w *Encoder) Bool(b bool) bool { + w.Sync(SyncBool) + var x byte + if b { + x = 1 + } + err := w.Data.WriteByte(x) + w.checkErr(err) + return b +} + +// Int64 encodes and writes an int64 value into the element bitstream. +func (w *Encoder) Int64(x int64) { + w.Sync(SyncInt64) + w.rawVarint(x) +} + +// Uint64 encodes and writes a uint64 value into the element bitstream. +func (w *Encoder) Uint64(x uint64) { + w.Sync(SyncUint64) + w.rawUvarint(x) +} + +// Len encodes and writes a non-negative int value into the element bitstream. +func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } + +// Int encodes and writes an int value into the element bitstream. +func (w *Encoder) Int(x int) { w.Int64(int64(x)) } + +// Len encodes and writes a uint value into the element bitstream. +func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } + +// Reloc encodes and writes a relocation for the given (section, +// index) pair into the element bitstream. +// +// Note: Only the index is formally written into the element +// bitstream, so bitstream decoders must know from context which +// section an encoded relocation refers to. +func (w *Encoder) Reloc(r RelocKind, idx Index) { + w.Sync(SyncUseReloc) + w.Len(w.rawReloc(r, idx)) +} + +// Code encodes and writes a Code value into the element bitstream. +func (w *Encoder) Code(c Code) { + w.Sync(c.Marker()) + w.Len(c.Value()) +} + +// String encodes and writes a string value into the element +// bitstream. +// +// Internally, strings are deduplicated by adding them to the strings +// section (if not already present), and then writing a relocation +// into the element bitstream. +func (w *Encoder) String(s string) { + w.StringRef(w.p.StringIdx(s)) +} + +// StringRef writes a reference to the given index, which must be a +// previously encoded string value. +func (w *Encoder) StringRef(idx Index) { + w.Sync(SyncString) + w.Reloc(RelocString, idx) +} + +// Strings encodes and writes a variable-length slice of strings into +// the element bitstream. +func (w *Encoder) Strings(ss []string) { + w.Len(len(ss)) + for _, s := range ss { + w.String(s) + } +} + +// Value encodes and writes a constant.Value into the element +// bitstream. +func (w *Encoder) Value(val constant.Value) { + w.Sync(SyncValue) + if w.Bool(val.Kind() == constant.Complex) { + w.scalar(constant.Real(val)) + w.scalar(constant.Imag(val)) + } else { + w.scalar(val) + } +} + +func (w *Encoder) scalar(val constant.Value) { + switch v := constant.Val(val).(type) { + default: + errorf("unhandled %v (%v)", val, val.Kind()) + case bool: + w.Code(ValBool) + w.Bool(v) + case string: + w.Code(ValString) + w.String(v) + case int64: + w.Code(ValInt64) + w.Int64(v) + case *big.Int: + w.Code(ValBigInt) + w.bigInt(v) + case *big.Rat: + w.Code(ValBigRat) + w.bigInt(v.Num()) + w.bigInt(v.Denom()) + case *big.Float: + w.Code(ValBigFloat) + w.bigFloat(v) + } +} + +func (w *Encoder) bigInt(v *big.Int) { + b := v.Bytes() + w.String(string(b)) // TODO: More efficient encoding. + w.Bool(v.Sign() < 0) +} + +func (w *Encoder) bigFloat(v *big.Float) { + b := v.Append(nil, 'p', -1) + w.String(string(b)) // TODO: More efficient encoding. +} diff --git a/src/internal/pkgbits/flags.go b/src/internal/pkgbits/flags.go new file mode 100644 index 0000000..6542227 --- /dev/null +++ b/src/internal/pkgbits/flags.go @@ -0,0 +1,9 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +const ( + flagSyncMarkers = 1 << iota // file format contains sync markers +) diff --git a/src/internal/pkgbits/reloc.go b/src/internal/pkgbits/reloc.go new file mode 100644 index 0000000..fcdfb97 --- /dev/null +++ b/src/internal/pkgbits/reloc.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// A RelocKind indicates a particular section within a unified IR export. +type RelocKind int32 + +// An Index represents a bitstream element index within a particular +// section. +type Index int32 + +// A relocEnt (relocation entry) is an entry in an element's local +// reference table. +// +// TODO(mdempsky): Rename this too. +type RelocEnt struct { + Kind RelocKind + Idx Index +} + +// Reserved indices within the meta relocation section. +const ( + PublicRootIdx Index = 0 + PrivateRootIdx Index = 1 +) + +const ( + RelocString RelocKind = iota + RelocMeta + RelocPosBase + RelocPkg + RelocName + RelocType + RelocObj + RelocObjExt + RelocObjDict + RelocBody + + numRelocs = iota +) diff --git a/src/internal/pkgbits/support.go b/src/internal/pkgbits/support.go new file mode 100644 index 0000000..f7579df --- /dev/null +++ b/src/internal/pkgbits/support.go @@ -0,0 +1,17 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import "fmt" + +func assert(b bool) { + if !b { + panic("assertion failed") + } +} + +func errorf(format string, args ...any) { + panic(fmt.Errorf(format, args...)) +} diff --git a/src/internal/pkgbits/sync.go b/src/internal/pkgbits/sync.go new file mode 100644 index 0000000..1520b73 --- /dev/null +++ b/src/internal/pkgbits/sync.go @@ -0,0 +1,136 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +import ( + "fmt" + "runtime" + "strings" +) + +// fmtFrames formats a backtrace for reporting reader/writer desyncs. +func fmtFrames(pcs ...uintptr) []string { + res := make([]string, 0, len(pcs)) + walkFrames(pcs, func(file string, line int, name string, offset uintptr) { + // Trim package from function name. It's just redundant noise. + name = strings.TrimPrefix(name, "cmd/compile/internal/noder.") + + res = append(res, fmt.Sprintf("%s:%v: %s +0x%v", file, line, name, offset)) + }) + return res +} + +type frameVisitor func(file string, line int, name string, offset uintptr) + +// walkFrames calls visit for each call frame represented by pcs. +// +// pcs should be a slice of PCs, as returned by runtime.Callers. +func walkFrames(pcs []uintptr, visit frameVisitor) { + if len(pcs) == 0 { + return + } + + frames := runtime.CallersFrames(pcs) + for { + frame, more := frames.Next() + visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) + if !more { + return + } + } +} + +// SyncMarker is an enum type that represents markers that may be +// written to export data to ensure the reader and writer stay +// synchronized. +type SyncMarker int + +//go:generate stringer -type=SyncMarker -trimprefix=Sync + +const ( + _ SyncMarker = iota + + // Public markers (known to go/types importers). + + // Low-level coding markers. + SyncEOF + SyncBool + SyncInt64 + SyncUint64 + SyncString + SyncValue + SyncVal + SyncRelocs + SyncReloc + SyncUseReloc + + // Higher-level object and type markers. + SyncPublic + SyncPos + SyncPosBase + SyncObject + SyncObject1 + SyncPkg + SyncPkgDef + SyncMethod + SyncType + SyncTypeIdx + SyncTypeParamNames + SyncSignature + SyncParams + SyncParam + SyncCodeObj + SyncSym + SyncLocalIdent + SyncSelector + + // Private markers (only known to cmd/compile). + SyncPrivate + + SyncFuncExt + SyncVarExt + SyncTypeExt + SyncPragma + + SyncExprList + SyncExprs + SyncExpr + SyncExprType + SyncAssign + SyncOp + SyncFuncLit + SyncCompLit + + SyncDecl + SyncFuncBody + SyncOpenScope + SyncCloseScope + SyncCloseAnotherScope + SyncDeclNames + SyncDeclName + + SyncStmts + SyncBlockStmt + SyncIfStmt + SyncForStmt + SyncSwitchStmt + SyncRangeStmt + SyncCaseClause + SyncCommClause + SyncSelectStmt + SyncDecls + SyncLabeledStmt + SyncUseObjLocal + SyncAddLocal + SyncLinkname + SyncStmt1 + SyncStmtsEnd + SyncLabel + SyncOptLabel + + SyncMultiExpr + SyncRType + SyncConvRTTI +) diff --git a/src/internal/pkgbits/syncmarker_string.go b/src/internal/pkgbits/syncmarker_string.go new file mode 100644 index 0000000..4a5b0ca --- /dev/null +++ b/src/internal/pkgbits/syncmarker_string.go @@ -0,0 +1,89 @@ +// Code generated by "stringer -type=SyncMarker -trimprefix=Sync"; DO NOT EDIT. + +package pkgbits + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[SyncEOF-1] + _ = x[SyncBool-2] + _ = x[SyncInt64-3] + _ = x[SyncUint64-4] + _ = x[SyncString-5] + _ = x[SyncValue-6] + _ = x[SyncVal-7] + _ = x[SyncRelocs-8] + _ = x[SyncReloc-9] + _ = x[SyncUseReloc-10] + _ = x[SyncPublic-11] + _ = x[SyncPos-12] + _ = x[SyncPosBase-13] + _ = x[SyncObject-14] + _ = x[SyncObject1-15] + _ = x[SyncPkg-16] + _ = x[SyncPkgDef-17] + _ = x[SyncMethod-18] + _ = x[SyncType-19] + _ = x[SyncTypeIdx-20] + _ = x[SyncTypeParamNames-21] + _ = x[SyncSignature-22] + _ = x[SyncParams-23] + _ = x[SyncParam-24] + _ = x[SyncCodeObj-25] + _ = x[SyncSym-26] + _ = x[SyncLocalIdent-27] + _ = x[SyncSelector-28] + _ = x[SyncPrivate-29] + _ = x[SyncFuncExt-30] + _ = x[SyncVarExt-31] + _ = x[SyncTypeExt-32] + _ = x[SyncPragma-33] + _ = x[SyncExprList-34] + _ = x[SyncExprs-35] + _ = x[SyncExpr-36] + _ = x[SyncExprType-37] + _ = x[SyncAssign-38] + _ = x[SyncOp-39] + _ = x[SyncFuncLit-40] + _ = x[SyncCompLit-41] + _ = x[SyncDecl-42] + _ = x[SyncFuncBody-43] + _ = x[SyncOpenScope-44] + _ = x[SyncCloseScope-45] + _ = x[SyncCloseAnotherScope-46] + _ = x[SyncDeclNames-47] + _ = x[SyncDeclName-48] + _ = x[SyncStmts-49] + _ = x[SyncBlockStmt-50] + _ = x[SyncIfStmt-51] + _ = x[SyncForStmt-52] + _ = x[SyncSwitchStmt-53] + _ = x[SyncRangeStmt-54] + _ = x[SyncCaseClause-55] + _ = x[SyncCommClause-56] + _ = x[SyncSelectStmt-57] + _ = x[SyncDecls-58] + _ = x[SyncLabeledStmt-59] + _ = x[SyncUseObjLocal-60] + _ = x[SyncAddLocal-61] + _ = x[SyncLinkname-62] + _ = x[SyncStmt1-63] + _ = x[SyncStmtsEnd-64] + _ = x[SyncLabel-65] + _ = x[SyncOptLabel-66] +} + +const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabel" + +var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458} + +func (i SyncMarker) String() string { + i -= 1 + if i < 0 || i >= SyncMarker(len(_SyncMarker_index)-1) { + return "SyncMarker(" + strconv.FormatInt(int64(i+1), 10) + ")" + } + return _SyncMarker_name[_SyncMarker_index[i]:_SyncMarker_index[i+1]] +} |