diff options
Diffstat (limited to 'src/internal/zstd/literals.go')
-rw-r--r-- | src/internal/zstd/literals.go | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/src/internal/zstd/literals.go b/src/internal/zstd/literals.go new file mode 100644 index 0000000..b46d668 --- /dev/null +++ b/src/internal/zstd/literals.go @@ -0,0 +1,330 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "encoding/binary" +) + +// readLiterals reads and decompresses the literals from data at off. +// The literals are appended to outbuf, which is returned. +// Also returns the new input offset. RFC 3.1.1.3.1. +func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) { + if off >= len(data) { + return 0, nil, r.makeEOFError(off) + } + + // Literals section header. RFC 3.1.1.3.1.1. + hdr := data[off] + off++ + + if (hdr&3) == 0 || (hdr&3) == 1 { + return r.readRawRLELiterals(data, off, hdr, outbuf) + } else { + return r.readHuffLiterals(data, off, hdr, outbuf) + } +} + +// readRawRLELiterals reads and decompresses a Raw_Literals_Block or +// a RLE_Literals_Block. RFC 3.1.1.3.1.1. +func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) { + raw := (hdr & 3) == 0 + + var regeneratedSize int + switch (hdr >> 2) & 3 { + case 0, 2: + regeneratedSize = int(hdr >> 3) + case 1: + if off >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + off++ + case 3: + if off+1 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12) + off += 2 + } + + // We are going to use the entire literal block in the output. + // The maximum size of one decompressed block is 128K, + // so we can't have more literals than that. + if regeneratedSize > 128<<10 { + return 0, nil, r.makeError(off, "literal size too large") + } + + if raw { + // RFC 3.1.1.3.1.2. + if off+regeneratedSize > len(data) { + return 0, nil, r.makeError(off, "raw literal size too large") + } + outbuf = append(outbuf, data[off:off+regeneratedSize]...) + off += regeneratedSize + } else { + // RFC 3.1.1.3.1.3. + if off >= len(data) { + return 0, nil, r.makeError(off, "RLE literal missing") + } + rle := data[off] + off++ + for i := 0; i < regeneratedSize; i++ { + outbuf = append(outbuf, rle) + } + } + + return off, outbuf, nil +} + +// readHuffLiterals reads and decompresses a Compressed_Literals_Block or +// a Treeless_Literals_Block. RFC 3.1.1.3.1.4. +func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) { + var ( + regeneratedSize int + compressedSize int + streams int + ) + switch (hdr >> 2) & 3 { + case 0, 1: + if off+1 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4) + compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2) + off += 2 + if ((hdr >> 2) & 3) == 0 { + streams = 1 + } else { + streams = 4 + } + case 2: + if off+2 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12) + compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6) + off += 3 + streams = 4 + case 3: + if off+3 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12) + compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10) + off += 4 + streams = 4 + } + + // We are going to use the entire literal block in the output. + // The maximum size of one decompressed block is 128K, + // so we can't have more literals than that. + if regeneratedSize > 128<<10 { + return 0, nil, r.makeError(off, "literal size too large") + } + + roff := off + compressedSize + if roff > len(data) || roff < 0 { + return 0, nil, r.makeEOFError(off) + } + + totalStreamsSize := compressedSize + if (hdr & 3) == 2 { + // Compressed_Literals_Block. + // Read new huffman tree. + + if len(r.huffmanTable) < 1<<maxHuffmanBits { + r.huffmanTable = make([]uint16, 1<<maxHuffmanBits) + } + + huffmanTableBits, hoff, err := r.readHuff(data, off, r.huffmanTable) + if err != nil { + return 0, nil, err + } + r.huffmanTableBits = huffmanTableBits + + if totalStreamsSize < hoff-off { + return 0, nil, r.makeError(off, "Huffman table too big") + } + totalStreamsSize -= hoff - off + off = hoff + } else { + // Treeless_Literals_Block + // Reuse previous Huffman tree. + if r.huffmanTableBits == 0 { + return 0, nil, r.makeError(off, "missing literals Huffman tree") + } + } + + // Decompress compressedSize bytes of data at off using the + // Huffman tree. + + var err error + if streams == 1 { + outbuf, err = r.readLiteralsOneStream(data, off, totalStreamsSize, regeneratedSize, outbuf) + } else { + outbuf, err = r.readLiteralsFourStreams(data, off, totalStreamsSize, regeneratedSize, outbuf) + } + + if err != nil { + return 0, nil, err + } + + return roff, outbuf, nil +} + +// readLiteralsOneStream reads a single stream of compressed literals. +func (r *Reader) readLiteralsOneStream(data block, off, compressedSize, regeneratedSize int, outbuf []byte) ([]byte, error) { + // We let the reverse bit reader read earlier bytes, + // because the Huffman table ignores bits that it doesn't need. + rbr, err := r.makeReverseBitReader(data, off+compressedSize-1, off-2) + if err != nil { + return nil, err + } + + huffTable := r.huffmanTable + huffBits := uint32(r.huffmanTableBits) + huffMask := (uint32(1) << huffBits) - 1 + + for i := 0; i < regeneratedSize; i++ { + if !rbr.fetch(uint8(huffBits)) { + return nil, rbr.makeError("literals Huffman stream out of bits") + } + + var t uint16 + idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask + t = huffTable[idx] + outbuf = append(outbuf, byte(t>>8)) + rbr.cnt -= uint32(t & 0xff) + } + + return outbuf, nil +} + +// readLiteralsFourStreams reads four interleaved streams of +// compressed literals. +func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) { + // Read the jump table to find out where the streams are. + // RFC 3.1.1.3.1.6. + if off+5 >= len(data) { + return nil, r.makeEOFError(off) + } + if totalStreamsSize < 6 { + return nil, r.makeError(off, "total streams size too small for jump table") + } + + streamSize1 := binary.LittleEndian.Uint16(data[off:]) + streamSize2 := binary.LittleEndian.Uint16(data[off+2:]) + streamSize3 := binary.LittleEndian.Uint16(data[off+4:]) + off += 6 + + tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3) + if tot > uint64(totalStreamsSize)-6 { + return nil, r.makeEOFError(off) + } + streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot) + + off-- + off1 := off + int(streamSize1) + start1 := off + 1 + + off2 := off1 + int(streamSize2) + start2 := off1 + 1 + + off3 := off2 + int(streamSize3) + start3 := off2 + 1 + + off4 := off3 + int(streamSize4) + start4 := off3 + 1 + + // We let the reverse bit readers read earlier bytes, + // because the Huffman tables ignore bits that they don't need. + + rbr1, err := r.makeReverseBitReader(data, off1, start1-2) + if err != nil { + return nil, err + } + + rbr2, err := r.makeReverseBitReader(data, off2, start2-2) + if err != nil { + return nil, err + } + + rbr3, err := r.makeReverseBitReader(data, off3, start3-2) + if err != nil { + return nil, err + } + + rbr4, err := r.makeReverseBitReader(data, off4, start4-2) + if err != nil { + return nil, err + } + + regeneratedStreamSize := (regeneratedSize + 3) / 4 + + out1 := len(outbuf) + out2 := out1 + regeneratedStreamSize + out3 := out2 + regeneratedStreamSize + out4 := out3 + regeneratedStreamSize + + regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3 + + outbuf = append(outbuf, make([]byte, regeneratedSize)...) + + huffTable := r.huffmanTable + huffBits := uint32(r.huffmanTableBits) + huffMask := (uint32(1) << huffBits) - 1 + + for i := 0; i < regeneratedStreamSize; i++ { + use4 := i < regeneratedStreamSize4 + + fetchHuff := func(rbr *reverseBitReader) (uint16, error) { + if !rbr.fetch(uint8(huffBits)) { + return 0, rbr.makeError("literals Huffman stream out of bits") + } + idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask + return huffTable[idx], nil + } + + t1, err := fetchHuff(&rbr1) + if err != nil { + return nil, err + } + + t2, err := fetchHuff(&rbr2) + if err != nil { + return nil, err + } + + t3, err := fetchHuff(&rbr3) + if err != nil { + return nil, err + } + + if use4 { + t4, err := fetchHuff(&rbr4) + if err != nil { + return nil, err + } + outbuf[out4] = byte(t4 >> 8) + out4++ + rbr4.cnt -= uint32(t4 & 0xff) + } + + outbuf[out1] = byte(t1 >> 8) + out1++ + rbr1.cnt -= uint32(t1 & 0xff) + + outbuf[out2] = byte(t2 >> 8) + out2++ + rbr2.cnt -= uint32(t2 & 0xff) + + outbuf[out3] = byte(t3 >> 8) + out3++ + rbr3.cnt -= uint32(t3 & 0xff) + } + + return outbuf, nil +} |