summaryrefslogtreecommitdiffstats
path: root/src/internal/zstd/literals.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/zstd/literals.go')
-rw-r--r--src/internal/zstd/literals.go330
1 files changed, 330 insertions, 0 deletions
diff --git a/src/internal/zstd/literals.go b/src/internal/zstd/literals.go
new file mode 100644
index 0000000..b46d668
--- /dev/null
+++ b/src/internal/zstd/literals.go
@@ -0,0 +1,330 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "encoding/binary"
+)
+
+// readLiterals reads and decompresses the literals from data at off.
+// The literals are appended to outbuf, which is returned.
+// Also returns the new input offset. RFC 3.1.1.3.1.
+func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) {
+ if off >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+
+ // Literals section header. RFC 3.1.1.3.1.1.
+ hdr := data[off]
+ off++
+
+ if (hdr&3) == 0 || (hdr&3) == 1 {
+ return r.readRawRLELiterals(data, off, hdr, outbuf)
+ } else {
+ return r.readHuffLiterals(data, off, hdr, outbuf)
+ }
+}
+
+// readRawRLELiterals reads and decompresses a Raw_Literals_Block or
+// a RLE_Literals_Block. RFC 3.1.1.3.1.1.
+func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
+ raw := (hdr & 3) == 0
+
+ var regeneratedSize int
+ switch (hdr >> 2) & 3 {
+ case 0, 2:
+ regeneratedSize = int(hdr >> 3)
+ case 1:
+ if off >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+ regeneratedSize = int(hdr>>4) + (int(data[off]) << 4)
+ off++
+ case 3:
+ if off+1 >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+ regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12)
+ off += 2
+ }
+
+ // We are going to use the entire literal block in the output.
+ // The maximum size of one decompressed block is 128K,
+ // so we can't have more literals than that.
+ if regeneratedSize > 128<<10 {
+ return 0, nil, r.makeError(off, "literal size too large")
+ }
+
+ if raw {
+ // RFC 3.1.1.3.1.2.
+ if off+regeneratedSize > len(data) {
+ return 0, nil, r.makeError(off, "raw literal size too large")
+ }
+ outbuf = append(outbuf, data[off:off+regeneratedSize]...)
+ off += regeneratedSize
+ } else {
+ // RFC 3.1.1.3.1.3.
+ if off >= len(data) {
+ return 0, nil, r.makeError(off, "RLE literal missing")
+ }
+ rle := data[off]
+ off++
+ for i := 0; i < regeneratedSize; i++ {
+ outbuf = append(outbuf, rle)
+ }
+ }
+
+ return off, outbuf, nil
+}
+
+// readHuffLiterals reads and decompresses a Compressed_Literals_Block or
+// a Treeless_Literals_Block. RFC 3.1.1.3.1.4.
+func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) {
+ var (
+ regeneratedSize int
+ compressedSize int
+ streams int
+ )
+ switch (hdr >> 2) & 3 {
+ case 0, 1:
+ if off+1 >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+ regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4)
+ compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2)
+ off += 2
+ if ((hdr >> 2) & 3) == 0 {
+ streams = 1
+ } else {
+ streams = 4
+ }
+ case 2:
+ if off+2 >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+ regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12)
+ compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6)
+ off += 3
+ streams = 4
+ case 3:
+ if off+3 >= len(data) {
+ return 0, nil, r.makeEOFError(off)
+ }
+ regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12)
+ compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10)
+ off += 4
+ streams = 4
+ }
+
+ // We are going to use the entire literal block in the output.
+ // The maximum size of one decompressed block is 128K,
+ // so we can't have more literals than that.
+ if regeneratedSize > 128<<10 {
+ return 0, nil, r.makeError(off, "literal size too large")
+ }
+
+ roff := off + compressedSize
+ if roff > len(data) || roff < 0 {
+ return 0, nil, r.makeEOFError(off)
+ }
+
+ totalStreamsSize := compressedSize
+ if (hdr & 3) == 2 {
+ // Compressed_Literals_Block.
+ // Read new huffman tree.
+
+ if len(r.huffmanTable) < 1<<maxHuffmanBits {
+ r.huffmanTable = make([]uint16, 1<<maxHuffmanBits)
+ }
+
+ huffmanTableBits, hoff, err := r.readHuff(data, off, r.huffmanTable)
+ if err != nil {
+ return 0, nil, err
+ }
+ r.huffmanTableBits = huffmanTableBits
+
+ if totalStreamsSize < hoff-off {
+ return 0, nil, r.makeError(off, "Huffman table too big")
+ }
+ totalStreamsSize -= hoff - off
+ off = hoff
+ } else {
+ // Treeless_Literals_Block
+ // Reuse previous Huffman tree.
+ if r.huffmanTableBits == 0 {
+ return 0, nil, r.makeError(off, "missing literals Huffman tree")
+ }
+ }
+
+ // Decompress compressedSize bytes of data at off using the
+ // Huffman tree.
+
+ var err error
+ if streams == 1 {
+ outbuf, err = r.readLiteralsOneStream(data, off, totalStreamsSize, regeneratedSize, outbuf)
+ } else {
+ outbuf, err = r.readLiteralsFourStreams(data, off, totalStreamsSize, regeneratedSize, outbuf)
+ }
+
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return roff, outbuf, nil
+}
+
+// readLiteralsOneStream reads a single stream of compressed literals.
+func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
+ // We let the reverse bit reader read earlier bytes,
+ // because the Huffman table ignores bits that it doesn't need.
+ rbr, err := r.makeReverseBitReader(data, off+compressedSize-1, off-2)
+ if err != nil {
+ return nil, err
+ }
+
+ huffTable := r.huffmanTable
+ huffBits := uint32(r.huffmanTableBits)
+ huffMask := (uint32(1) << huffBits) - 1
+
+ for i := 0; i < regeneratedSize; i++ {
+ if !rbr.fetch(uint8(huffBits)) {
+ return nil, rbr.makeError("literals Huffman stream out of bits")
+ }
+
+ var t uint16
+ idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
+ t = huffTable[idx]
+ outbuf = append(outbuf, byte(t>>8))
+ rbr.cnt -= uint32(t & 0xff)
+ }
+
+ return outbuf, nil
+}
+
+// readLiteralsFourStreams reads four interleaved streams of
+// compressed literals.
+func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) {
+ // Read the jump table to find out where the streams are.
+ // RFC 3.1.1.3.1.6.
+ if off+5 >= len(data) {
+ return nil, r.makeEOFError(off)
+ }
+ if totalStreamsSize < 6 {
+ return nil, r.makeError(off, "total streams size too small for jump table")
+ }
+
+ streamSize1 := binary.LittleEndian.Uint16(data[off:])
+ streamSize2 := binary.LittleEndian.Uint16(data[off+2:])
+ streamSize3 := binary.LittleEndian.Uint16(data[off+4:])
+ off += 6
+
+ tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3)
+ if tot > uint64(totalStreamsSize)-6 {
+ return nil, r.makeEOFError(off)
+ }
+ streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot)
+
+ off--
+ off1 := off + int(streamSize1)
+ start1 := off + 1
+
+ off2 := off1 + int(streamSize2)
+ start2 := off1 + 1
+
+ off3 := off2 + int(streamSize3)
+ start3 := off2 + 1
+
+ off4 := off3 + int(streamSize4)
+ start4 := off3 + 1
+
+ // We let the reverse bit readers read earlier bytes,
+ // because the Huffman tables ignore bits that they don't need.
+
+ rbr1, err := r.makeReverseBitReader(data, off1, start1-2)
+ if err != nil {
+ return nil, err
+ }
+
+ rbr2, err := r.makeReverseBitReader(data, off2, start2-2)
+ if err != nil {
+ return nil, err
+ }
+
+ rbr3, err := r.makeReverseBitReader(data, off3, start3-2)
+ if err != nil {
+ return nil, err
+ }
+
+ rbr4, err := r.makeReverseBitReader(data, off4, start4-2)
+ if err != nil {
+ return nil, err
+ }
+
+ regeneratedStreamSize := (regeneratedSize + 3) / 4
+
+ out1 := len(outbuf)
+ out2 := out1 + regeneratedStreamSize
+ out3 := out2 + regeneratedStreamSize
+ out4 := out3 + regeneratedStreamSize
+
+ regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3
+
+ outbuf = append(outbuf, make([]byte, regeneratedSize)...)
+
+ huffTable := r.huffmanTable
+ huffBits := uint32(r.huffmanTableBits)
+ huffMask := (uint32(1) << huffBits) - 1
+
+ for i := 0; i < regeneratedStreamSize; i++ {
+ use4 := i < regeneratedStreamSize4
+
+ fetchHuff := func(rbr *reverseBitReader) (uint16, error) {
+ if !rbr.fetch(uint8(huffBits)) {
+ return 0, rbr.makeError("literals Huffman stream out of bits")
+ }
+ idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask
+ return huffTable[idx], nil
+ }
+
+ t1, err := fetchHuff(&rbr1)
+ if err != nil {
+ return nil, err
+ }
+
+ t2, err := fetchHuff(&rbr2)
+ if err != nil {
+ return nil, err
+ }
+
+ t3, err := fetchHuff(&rbr3)
+ if err != nil {
+ return nil, err
+ }
+
+ if use4 {
+ t4, err := fetchHuff(&rbr4)
+ if err != nil {
+ return nil, err
+ }
+ outbuf[out4] = byte(t4 >> 8)
+ out4++
+ rbr4.cnt -= uint32(t4 & 0xff)
+ }
+
+ outbuf[out1] = byte(t1 >> 8)
+ out1++
+ rbr1.cnt -= uint32(t1 & 0xff)
+
+ outbuf[out2] = byte(t2 >> 8)
+ out2++
+ rbr2.cnt -= uint32(t2 & 0xff)
+
+ outbuf[out3] = byte(t3 >> 8)
+ out3++
+ rbr3.cnt -= uint32(t3 & 0xff)
+ }
+
+ return outbuf, nil
+}