summaryrefslogtreecommitdiffstats
path: root/src/internal/pkgbits/encoder.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:23:18 +0000
commit43a123c1ae6613b3efeed291fa552ecd909d3acf (patch)
treefd92518b7024bc74031f78a1cf9e454b65e73665 /src/internal/pkgbits/encoder.go
parentInitial commit. (diff)
downloadgolang-1.20-upstream.tar.xz
golang-1.20-upstream.zip
Adding upstream version 1.20.14.upstream/1.20.14upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/internal/pkgbits/encoder.go')
-rw-r--r--src/internal/pkgbits/encoder.go394
1 files changed, 394 insertions, 0 deletions
diff --git a/src/internal/pkgbits/encoder.go b/src/internal/pkgbits/encoder.go
new file mode 100644
index 0000000..70a2cba
--- /dev/null
+++ b/src/internal/pkgbits/encoder.go
@@ -0,0 +1,394 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pkgbits
+
+import (
+ "bytes"
+ "crypto/md5"
+ "encoding/binary"
+ "go/constant"
+ "io"
+ "math/big"
+ "runtime"
+ "strings"
+)
+
+// currentVersion is the current version number.
+//
+// - v0: initial prototype
+//
+// - v1: adds the flags uint32 word
+//
+// TODO(mdempsky): For the next version bump:
+// - remove the legacy "has init" bool from the public root
+// - remove obj's "derived func instance" bool
+const currentVersion uint32 = 1
+
+// A PkgEncoder provides methods for encoding a package's Unified IR
+// export data.
+type PkgEncoder struct {
+ // elems holds the bitstream for previously encoded elements.
+ elems [numRelocs][]string
+
+ // stringsIdx maps previously encoded strings to their index within
+ // the RelocString section, to allow deduplication. That is,
+ // elems[RelocString][stringsIdx[s]] == s (if present).
+ stringsIdx map[string]Index
+
+ // syncFrames is the number of frames to write at each sync
+ // marker. A negative value means sync markers are omitted.
+ syncFrames int
+}
+
+// SyncMarkers reports whether pw uses sync markers.
+func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
+
+// NewPkgEncoder returns an initialized PkgEncoder.
+//
+// syncFrames is the number of caller frames that should be serialized
+// at Sync points. Serializing additional frames results in larger
+// export data files, but can help diagnosing desync errors in
+// higher-level Unified IR reader/writer code. If syncFrames is
+// negative, then sync markers are omitted entirely.
+func NewPkgEncoder(syncFrames int) PkgEncoder {
+ return PkgEncoder{
+ stringsIdx: make(map[string]Index),
+ syncFrames: syncFrames,
+ }
+}
+
+// DumpTo writes the package's encoded data to out0 and returns the
+// package fingerprint.
+func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
+ h := md5.New()
+ out := io.MultiWriter(out0, h)
+
+ writeUint32 := func(x uint32) {
+ assert(binary.Write(out, binary.LittleEndian, x) == nil)
+ }
+
+ writeUint32(currentVersion)
+
+ var flags uint32
+ if pw.SyncMarkers() {
+ flags |= flagSyncMarkers
+ }
+ writeUint32(flags)
+
+ // Write elemEndsEnds.
+ var sum uint32
+ for _, elems := range &pw.elems {
+ sum += uint32(len(elems))
+ writeUint32(sum)
+ }
+
+ // Write elemEnds.
+ sum = 0
+ for _, elems := range &pw.elems {
+ for _, elem := range elems {
+ sum += uint32(len(elem))
+ writeUint32(sum)
+ }
+ }
+
+ // Write elemData.
+ for _, elems := range &pw.elems {
+ for _, elem := range elems {
+ _, err := io.WriteString(out, elem)
+ assert(err == nil)
+ }
+ }
+
+ // Write fingerprint.
+ copy(fingerprint[:], h.Sum(nil))
+ _, err := out0.Write(fingerprint[:])
+ assert(err == nil)
+
+ return
+}
+
+// StringIdx adds a string value to the strings section, if not
+// already present, and returns its index.
+func (pw *PkgEncoder) StringIdx(s string) Index {
+ if idx, ok := pw.stringsIdx[s]; ok {
+ assert(pw.elems[RelocString][idx] == s)
+ return idx
+ }
+
+ idx := Index(len(pw.elems[RelocString]))
+ pw.elems[RelocString] = append(pw.elems[RelocString], s)
+ pw.stringsIdx[s] = idx
+ return idx
+}
+
+// NewEncoder returns an Encoder for a new element within the given
+// section, and encodes the given SyncMarker as the start of the
+// element bitstream.
+func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
+ e := pw.NewEncoderRaw(k)
+ e.Sync(marker)
+ return e
+}
+
+// NewEncoderRaw returns an Encoder for a new element within the given
+// section.
+//
+// Most callers should use NewEncoder instead.
+func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
+ idx := Index(len(pw.elems[k]))
+ pw.elems[k] = append(pw.elems[k], "") // placeholder
+
+ return Encoder{
+ p: pw,
+ k: k,
+ Idx: idx,
+ }
+}
+
+// An Encoder provides methods for encoding an individual element's
+// bitstream data.
+type Encoder struct {
+ p *PkgEncoder
+
+ Relocs []RelocEnt
+ RelocMap map[RelocEnt]uint32
+ Data bytes.Buffer // accumulated element bitstream data
+
+ encodingRelocHeader bool
+
+ k RelocKind
+ Idx Index // index within relocation section
+}
+
+// Flush finalizes the element's bitstream and returns its Index.
+func (w *Encoder) Flush() Index {
+ var sb strings.Builder
+
+ // Backup the data so we write the relocations at the front.
+ var tmp bytes.Buffer
+ io.Copy(&tmp, &w.Data)
+
+ // TODO(mdempsky): Consider writing these out separately so they're
+ // easier to strip, along with function bodies, so that we can prune
+ // down to just the data that's relevant to go/types.
+ if w.encodingRelocHeader {
+ panic("encodingRelocHeader already true; recursive flush?")
+ }
+ w.encodingRelocHeader = true
+ w.Sync(SyncRelocs)
+ w.Len(len(w.Relocs))
+ for _, rEnt := range w.Relocs {
+ w.Sync(SyncReloc)
+ w.Len(int(rEnt.Kind))
+ w.Len(int(rEnt.Idx))
+ }
+
+ io.Copy(&sb, &w.Data)
+ io.Copy(&sb, &tmp)
+ w.p.elems[w.k][w.Idx] = sb.String()
+
+ return w.Idx
+}
+
+func (w *Encoder) checkErr(err error) {
+ if err != nil {
+ errorf("unexpected encoding error: %v", err)
+ }
+}
+
+func (w *Encoder) rawUvarint(x uint64) {
+ var buf [binary.MaxVarintLen64]byte
+ n := binary.PutUvarint(buf[:], x)
+ _, err := w.Data.Write(buf[:n])
+ w.checkErr(err)
+}
+
+func (w *Encoder) rawVarint(x int64) {
+ // Zig-zag encode.
+ ux := uint64(x) << 1
+ if x < 0 {
+ ux = ^ux
+ }
+
+ w.rawUvarint(ux)
+}
+
+func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
+ e := RelocEnt{r, idx}
+ if w.RelocMap != nil {
+ if i, ok := w.RelocMap[e]; ok {
+ return int(i)
+ }
+ } else {
+ w.RelocMap = make(map[RelocEnt]uint32)
+ }
+
+ i := len(w.Relocs)
+ w.RelocMap[e] = uint32(i)
+ w.Relocs = append(w.Relocs, e)
+ return i
+}
+
+func (w *Encoder) Sync(m SyncMarker) {
+ if !w.p.SyncMarkers() {
+ return
+ }
+
+ // Writing out stack frame string references requires working
+ // relocations, but writing out the relocations themselves involves
+ // sync markers. To prevent infinite recursion, we simply trim the
+ // stack frame for sync markers within the relocation header.
+ var frames []string
+ if !w.encodingRelocHeader && w.p.syncFrames > 0 {
+ pcs := make([]uintptr, w.p.syncFrames)
+ n := runtime.Callers(2, pcs)
+ frames = fmtFrames(pcs[:n]...)
+ }
+
+ // TODO(mdempsky): Save space by writing out stack frames as a
+ // linked list so we can share common stack frames.
+ w.rawUvarint(uint64(m))
+ w.rawUvarint(uint64(len(frames)))
+ for _, frame := range frames {
+ w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
+ }
+}
+
+// Bool encodes and writes a bool value into the element bitstream,
+// and then returns the bool value.
+//
+// For simple, 2-alternative encodings, the idiomatic way to call Bool
+// is something like:
+//
+// if w.Bool(x != 0) {
+// // alternative #1
+// } else {
+// // alternative #2
+// }
+//
+// For multi-alternative encodings, use Code instead.
+func (w *Encoder) Bool(b bool) bool {
+ w.Sync(SyncBool)
+ var x byte
+ if b {
+ x = 1
+ }
+ err := w.Data.WriteByte(x)
+ w.checkErr(err)
+ return b
+}
+
+// Int64 encodes and writes an int64 value into the element bitstream.
+func (w *Encoder) Int64(x int64) {
+ w.Sync(SyncInt64)
+ w.rawVarint(x)
+}
+
+// Uint64 encodes and writes a uint64 value into the element bitstream.
+func (w *Encoder) Uint64(x uint64) {
+ w.Sync(SyncUint64)
+ w.rawUvarint(x)
+}
+
+// Len encodes and writes a non-negative int value into the element bitstream.
+func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
+
+// Int encodes and writes an int value into the element bitstream.
+func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
+
+// Len encodes and writes a uint value into the element bitstream.
+func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
+
+// Reloc encodes and writes a relocation for the given (section,
+// index) pair into the element bitstream.
+//
+// Note: Only the index is formally written into the element
+// bitstream, so bitstream decoders must know from context which
+// section an encoded relocation refers to.
+func (w *Encoder) Reloc(r RelocKind, idx Index) {
+ w.Sync(SyncUseReloc)
+ w.Len(w.rawReloc(r, idx))
+}
+
+// Code encodes and writes a Code value into the element bitstream.
+func (w *Encoder) Code(c Code) {
+ w.Sync(c.Marker())
+ w.Len(c.Value())
+}
+
+// String encodes and writes a string value into the element
+// bitstream.
+//
+// Internally, strings are deduplicated by adding them to the strings
+// section (if not already present), and then writing a relocation
+// into the element bitstream.
+func (w *Encoder) String(s string) {
+ w.StringRef(w.p.StringIdx(s))
+}
+
+// StringRef writes a reference to the given index, which must be a
+// previously encoded string value.
+func (w *Encoder) StringRef(idx Index) {
+ w.Sync(SyncString)
+ w.Reloc(RelocString, idx)
+}
+
+// Strings encodes and writes a variable-length slice of strings into
+// the element bitstream.
+func (w *Encoder) Strings(ss []string) {
+ w.Len(len(ss))
+ for _, s := range ss {
+ w.String(s)
+ }
+}
+
+// Value encodes and writes a constant.Value into the element
+// bitstream.
+func (w *Encoder) Value(val constant.Value) {
+ w.Sync(SyncValue)
+ if w.Bool(val.Kind() == constant.Complex) {
+ w.scalar(constant.Real(val))
+ w.scalar(constant.Imag(val))
+ } else {
+ w.scalar(val)
+ }
+}
+
+func (w *Encoder) scalar(val constant.Value) {
+ switch v := constant.Val(val).(type) {
+ default:
+ errorf("unhandled %v (%v)", val, val.Kind())
+ case bool:
+ w.Code(ValBool)
+ w.Bool(v)
+ case string:
+ w.Code(ValString)
+ w.String(v)
+ case int64:
+ w.Code(ValInt64)
+ w.Int64(v)
+ case *big.Int:
+ w.Code(ValBigInt)
+ w.bigInt(v)
+ case *big.Rat:
+ w.Code(ValBigRat)
+ w.bigInt(v.Num())
+ w.bigInt(v.Denom())
+ case *big.Float:
+ w.Code(ValBigFloat)
+ w.bigFloat(v)
+ }
+}
+
+func (w *Encoder) bigInt(v *big.Int) {
+ b := v.Bytes()
+ w.String(string(b)) // TODO: More efficient encoding.
+ w.Bool(v.Sign() < 0)
+}
+
+func (w *Encoder) bigFloat(v *big.Float) {
+ b := v.Append(nil, 'p', -1)
+ w.String(string(b)) // TODO: More efficient encoding.
+}