summaryrefslogtreecommitdiffstats
path: root/src/internal/zstd/huff.go
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-16 19:19:13 +0000
commitccd992355df7192993c666236047820244914598 (patch)
treef00fea65147227b7743083c6148396f74cd66935 /src/internal/zstd/huff.go
parentInitial commit. (diff)
downloadgolang-1.21-ccd992355df7192993c666236047820244914598.tar.xz
golang-1.21-ccd992355df7192993c666236047820244914598.zip
Adding upstream version 1.21.8.upstream/1.21.8
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/internal/zstd/huff.go')
-rw-r--r--src/internal/zstd/huff.go204
1 files changed, 204 insertions, 0 deletions
diff --git a/src/internal/zstd/huff.go b/src/internal/zstd/huff.go
new file mode 100644
index 0000000..452e24b
--- /dev/null
+++ b/src/internal/zstd/huff.go
@@ -0,0 +1,204 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "io"
+ "math/bits"
+)
+
+// maxHuffmanBits is the largest possible Huffman table bits.
+const maxHuffmanBits = 11
+
+// readHuff reads Huffman table from data starting at off into table.
+// Each entry in a Huffman table is a pair of bytes.
+// The high byte is the encoded value. The low byte is the number
+// of bits used to encode that value. We index into the table
+// with a value of size tableBits. A value that requires fewer bits
+// appear in the table multiple times.
+// This returns the number of bits in the Huffman table and the new offset.
+// RFC 4.2.1.
+func (r *Reader) readHuff(data block, off int, table []uint16) (tableBits, roff int, err error) {
+ if off >= len(data) {
+ return 0, 0, r.makeEOFError(off)
+ }
+
+ hdr := data[off]
+ off++
+
+ var weights [256]uint8
+ var count int
+ if hdr < 128 {
+ // The table is compressed using an FSE. RFC 4.2.1.2.
+ if len(r.fseScratch) < 1<<6 {
+ r.fseScratch = make([]fseEntry, 1<<6)
+ }
+ fseBits, noff, err := r.readFSE(data, off, 255, 6, r.fseScratch)
+ if err != nil {
+ return 0, 0, err
+ }
+ fseTable := r.fseScratch
+
+ if off+int(hdr) > len(data) {
+ return 0, 0, r.makeEOFError(off)
+ }
+
+ rbr, err := r.makeReverseBitReader(data, off+int(hdr)-1, noff)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ state1, err := rbr.val(uint8(fseBits))
+ if err != nil {
+ return 0, 0, err
+ }
+
+ state2, err := rbr.val(uint8(fseBits))
+ if err != nil {
+ return 0, 0, err
+ }
+
+ // There are two independent FSE streams, tracked by
+ // state1 and state2. We decode them alternately.
+
+ for {
+ pt := &fseTable[state1]
+ if !rbr.fetch(pt.bits) {
+ if count >= 254 {
+ return 0, 0, rbr.makeError("Huffman count overflow")
+ }
+ weights[count] = pt.sym
+ weights[count+1] = fseTable[state2].sym
+ count += 2
+ break
+ }
+
+ v, err := rbr.val(pt.bits)
+ if err != nil {
+ return 0, 0, err
+ }
+ state1 = uint32(pt.base) + v
+
+ if count >= 255 {
+ return 0, 0, rbr.makeError("Huffman count overflow")
+ }
+
+ weights[count] = pt.sym
+ count++
+
+ pt = &fseTable[state2]
+
+ if !rbr.fetch(pt.bits) {
+ if count >= 254 {
+ return 0, 0, rbr.makeError("Huffman count overflow")
+ }
+ weights[count] = pt.sym
+ weights[count+1] = fseTable[state1].sym
+ count += 2
+ break
+ }
+
+ v, err = rbr.val(pt.bits)
+ if err != nil {
+ return 0, 0, err
+ }
+ state2 = uint32(pt.base) + v
+
+ if count >= 255 {
+ return 0, 0, rbr.makeError("Huffman count overflow")
+ }
+
+ weights[count] = pt.sym
+ count++
+ }
+
+ off += int(hdr)
+ } else {
+ // The table is not compressed. Each weight is 4 bits.
+
+ count = int(hdr) - 127
+ if off+((count+1)/2) >= len(data) {
+ return 0, 0, io.ErrUnexpectedEOF
+ }
+ for i := 0; i < count; i += 2 {
+ b := data[off]
+ off++
+ weights[i] = b >> 4
+ weights[i+1] = b & 0xf
+ }
+ }
+
+ // RFC 4.2.1.3.
+
+ var weightMark [13]uint32
+ weightMask := uint32(0)
+ for _, w := range weights[:count] {
+ if w > 12 {
+ return 0, 0, r.makeError(off, "Huffman weight overflow")
+ }
+ weightMark[w]++
+ if w > 0 {
+ weightMask += 1 << (w - 1)
+ }
+ }
+ if weightMask == 0 {
+ return 0, 0, r.makeError(off, "bad Huffman weights")
+ }
+
+ tableBits = 32 - bits.LeadingZeros32(weightMask)
+ if tableBits > maxHuffmanBits {
+ return 0, 0, r.makeError(off, "bad Huffman weights")
+ }
+
+ if len(table) < 1<<tableBits {
+ return 0, 0, r.makeError(off, "Huffman table too small")
+ }
+
+ // Work out the last weight value, which is omitted because
+ // the weights must sum to a power of two.
+ left := (uint32(1) << tableBits) - weightMask
+ if left == 0 {
+ return 0, 0, r.makeError(off, "bad Huffman weights")
+ }
+ highBit := 31 - bits.LeadingZeros32(left)
+ if uint32(1)<<highBit != left {
+ return 0, 0, r.makeError(off, "bad Huffman weights")
+ }
+ if count >= 256 {
+ return 0, 0, r.makeError(off, "Huffman weight overflow")
+ }
+ weights[count] = uint8(highBit + 1)
+ count++
+ weightMark[highBit+1]++
+
+ if weightMark[1] < 2 || weightMark[1]&1 != 0 {
+ return 0, 0, r.makeError(off, "bad Huffman weights")
+ }
+
+ // Change weightMark from a count of weights to the index of
+ // the first symbol for that weight. We shift the indexes to
+ // also store how many we have seen so far,
+ next := uint32(0)
+ for i := 0; i < tableBits; i++ {
+ cur := next
+ next += weightMark[i+1] << i
+ weightMark[i+1] = cur
+ }
+
+ for i, w := range weights[:count] {
+ if w == 0 {
+ continue
+ }
+ length := uint32(1) << (w - 1)
+ tval := uint16(i)<<8 | (uint16(tableBits) + 1 - uint16(w))
+ start := weightMark[w]
+ for j := uint32(0); j < length; j++ {
+ table[start+j] = tval
+ }
+ weightMark[w] += length
+ }
+
+ return tableBits, off, nil
+}