diff options
Diffstat (limited to 'src/compress/lzw/writer.go')
-rw-r--r-- | src/compress/lzw/writer.go | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/src/compress/lzw/writer.go b/src/compress/lzw/writer.go new file mode 100644 index 0000000..cf06ea8 --- /dev/null +++ b/src/compress/lzw/writer.go @@ -0,0 +1,293 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lzw + +import ( + "bufio" + "errors" + "fmt" + "io" +) + +// A writer is a buffered, flushable writer. +type writer interface { + io.ByteWriter + Flush() error +} + +const ( + // A code is a 12 bit value, stored as a uint32 when encoding to avoid + // type conversions when shifting bits. + maxCode = 1<<12 - 1 + invalidCode = 1<<32 - 1 + // There are 1<<12 possible codes, which is an upper bound on the number of + // valid hash table entries at any given point in time. tableSize is 4x that. + tableSize = 4 * 1 << 12 + tableMask = tableSize - 1 + // A hash table entry is a uint32. Zero is an invalid entry since the + // lower 12 bits of a valid entry must be a non-literal code. + invalidEntry = 0 +) + +// Writer is an LZW compressor. It writes the compressed form of the data +// to an underlying writer (see NewWriter). +type Writer struct { + // w is the writer that compressed bytes are written to. + w writer + // order, write, bits, nBits and width are the state for + // converting a code stream into a byte stream. + order Order + write func(*Writer, uint32) error + bits uint32 + nBits uint + width uint + // litWidth is the width in bits of literal codes. + litWidth uint + // hi is the code implied by the next code emission. + // overflow is the code at which hi overflows the code width. + hi, overflow uint32 + // savedCode is the accumulated code at the end of the most recent Write + // call. It is equal to invalidCode if there was no such call. + savedCode uint32 + // err is the first error encountered during writing. Closing the writer + // will make any future Write calls return errClosed + err error + // table is the hash table from 20-bit keys to 12-bit values. Each table + // entry contains key<<12|val and collisions resolve by linear probing. + // The keys consist of a 12-bit code prefix and an 8-bit byte suffix. + // The values are a 12-bit code. + table [tableSize]uint32 +} + +// writeLSB writes the code c for "Least Significant Bits first" data. +func (w *Writer) writeLSB(c uint32) error { + w.bits |= c << w.nBits + w.nBits += w.width + for w.nBits >= 8 { + if err := w.w.WriteByte(uint8(w.bits)); err != nil { + return err + } + w.bits >>= 8 + w.nBits -= 8 + } + return nil +} + +// writeMSB writes the code c for "Most Significant Bits first" data. +func (w *Writer) writeMSB(c uint32) error { + w.bits |= c << (32 - w.width - w.nBits) + w.nBits += w.width + for w.nBits >= 8 { + if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil { + return err + } + w.bits <<= 8 + w.nBits -= 8 + } + return nil +} + +// errOutOfCodes is an internal error that means that the writer has run out +// of unused codes and a clear code needs to be sent next. +var errOutOfCodes = errors.New("lzw: out of codes") + +// incHi increments e.hi and checks for both overflow and running out of +// unused codes. In the latter case, incHi sends a clear code, resets the +// writer state and returns errOutOfCodes. +func (w *Writer) incHi() error { + w.hi++ + if w.hi == w.overflow { + w.width++ + w.overflow <<= 1 + } + if w.hi == maxCode { + clear := uint32(1) << w.litWidth + if err := w.write(w, clear); err != nil { + return err + } + w.width = w.litWidth + 1 + w.hi = clear + 1 + w.overflow = clear << 1 + for i := range w.table { + w.table[i] = invalidEntry + } + return errOutOfCodes + } + return nil +} + +// Write writes a compressed representation of p to w's underlying writer. +func (w *Writer) Write(p []byte) (n int, err error) { + if w.err != nil { + return 0, w.err + } + if len(p) == 0 { + return 0, nil + } + if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff { + for _, x := range p { + if x > maxLit { + w.err = errors.New("lzw: input byte too large for the litWidth") + return 0, w.err + } + } + } + n = len(p) + code := w.savedCode + if code == invalidCode { + // This is the first write; send a clear code. + // https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F + // "Variable-Length-Code LZW Compression" says that "Encoders should + // output a Clear code as the first code of each image data stream". + // + // LZW compression isn't only used by GIF, but it's cheap to follow + // that directive unconditionally. + clear := uint32(1) << w.litWidth + if err := w.write(w, clear); err != nil { + return 0, err + } + // After the starting clear code, the next code sent (for non-empty + // input) is always a literal code. + code, p = uint32(p[0]), p[1:] + } +loop: + for _, x := range p { + literal := uint32(x) + key := code<<8 | literal + // If there is a hash table hit for this key then we continue the loop + // and do not emit a code yet. + hash := (key>>12 ^ key) & tableMask + for h, t := hash, w.table[hash]; t != invalidEntry; { + if key == t>>12 { + code = t & maxCode + continue loop + } + h = (h + 1) & tableMask + t = w.table[h] + } + // Otherwise, write the current code, and literal becomes the start of + // the next emitted code. + if w.err = w.write(w, code); w.err != nil { + return 0, w.err + } + code = literal + // Increment e.hi, the next implied code. If we run out of codes, reset + // the writer state (including clearing the hash table) and continue. + if err1 := w.incHi(); err1 != nil { + if err1 == errOutOfCodes { + continue + } + w.err = err1 + return 0, w.err + } + // Otherwise, insert key -> e.hi into the map that e.table represents. + for { + if w.table[hash] == invalidEntry { + w.table[hash] = (key << 12) | w.hi + break + } + hash = (hash + 1) & tableMask + } + } + w.savedCode = code + return n, nil +} + +// Close closes the Writer, flushing any pending output. It does not close +// w's underlying writer. +func (w *Writer) Close() error { + if w.err != nil { + if w.err == errClosed { + return nil + } + return w.err + } + // Make any future calls to Write return errClosed. + w.err = errClosed + // Write the savedCode if valid. + if w.savedCode != invalidCode { + if err := w.write(w, w.savedCode); err != nil { + return err + } + if err := w.incHi(); err != nil && err != errOutOfCodes { + return err + } + } else { + // Write the starting clear code, as w.Write did not. + clear := uint32(1) << w.litWidth + if err := w.write(w, clear); err != nil { + return err + } + } + // Write the eof code. + eof := uint32(1)<<w.litWidth + 1 + if err := w.write(w, eof); err != nil { + return err + } + // Write the final bits. + if w.nBits > 0 { + if w.order == MSB { + w.bits >>= 24 + } + if err := w.w.WriteByte(uint8(w.bits)); err != nil { + return err + } + } + return w.w.Flush() +} + +// Reset clears the Writer's state and allows it to be reused again +// as a new Writer. +func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) { + *w = Writer{} + w.init(dst, order, litWidth) +} + +// NewWriter creates a new io.WriteCloser. +// Writes to the returned io.WriteCloser are compressed and written to w. +// It is the caller's responsibility to call Close on the WriteCloser when +// finished writing. +// The number of bits to use for literal codes, litWidth, must be in the +// range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth. +// +// It is guaranteed that the underlying type of the returned io.WriteCloser +// is a *Writer. +func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser { + return newWriter(w, order, litWidth) +} + +func newWriter(dst io.Writer, order Order, litWidth int) *Writer { + w := new(Writer) + w.init(dst, order, litWidth) + return w +} + +func (w *Writer) init(dst io.Writer, order Order, litWidth int) { + switch order { + case LSB: + w.write = (*Writer).writeLSB + case MSB: + w.write = (*Writer).writeMSB + default: + w.err = errors.New("lzw: unknown order") + return + } + if litWidth < 2 || 8 < litWidth { + w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth) + return + } + bw, ok := dst.(writer) + if !ok && dst != nil { + bw = bufio.NewWriter(dst) + } + w.w = bw + lw := uint(litWidth) + w.order = order + w.width = 1 + lw + w.litWidth = lw + w.hi = 1<<lw + 1 + w.overflow = 1 << (lw + 1) + w.savedCode = invalidCode +} |