summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:20:02 +0000
commit58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
parentReleasing debian version 1.43.2-1. (diff)
downloadnetdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h')
-rw-r--r--ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h431
1 files changed, 431 insertions, 0 deletions
diff --git a/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h b/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
new file mode 100644
index 000000000..e46b23fad
--- /dev/null
+++ b/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
@@ -0,0 +1,431 @@
+// Copyright (C) 2003 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_COMPRESS_STREAM_KERNEl_2_
+#define DLIB_COMPRESS_STREAM_KERNEl_2_
+
+#include "../algs.h"
+#include <iostream>
+#include <streambuf>
+#include "compress_stream_kernel_abstract.h"
+
+namespace dlib
+{
+
+ template <
+ typename fce,
+ typename fcd,
+ typename lz77_buffer,
+ typename sliding_buffer,
+ typename fce_length,
+ typename fcd_length,
+ typename fce_index,
+ typename fcd_index,
+ typename crc32
+ >
+ class compress_stream_kernel_2
+ {
+ /*!
+ REQUIREMENTS ON fce
+ is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+ the alphabet_size of fce must be 257.
+ fce and fcd share the same kernel number.
+
+ REQUIREMENTS ON fcd
+ is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+ the alphabet_size of fcd must be 257.
+ fce and fcd share the same kernel number.
+
+ REQUIREMENTS ON lz77_buffer
+ is an implementation of lz77_buffer/lz77_buffer_kernel_abstract.h
+
+ REQUIREMENTS ON sliding_buffer
+ is an implementation of sliding_buffer/sliding_buffer_kernel_abstract.h
+ is instantiated with T = unsigned char
+
+ REQUIREMENTS ON fce_length
+ is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+ the alphabet_size of fce must be 513. This will be used to encode the length of lz77 matches.
+ fce_length and fcd share the same kernel number.
+
+ REQUIREMENTS ON fcd_length
+ is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+ the alphabet_size of fcd must be 513. This will be used to decode the length of lz77 matches.
+ fce_length and fcd share the same kernel number.
+
+ REQUIREMENTS ON fce_index
+ is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+ the alphabet_size of fce must be 32257. This will be used to encode the index of lz77 matches.
+ fce_index and fcd share the same kernel number.
+
+ REQUIREMENTS ON fcd_index
+ is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+ the alphabet_size of fcd must be 32257. This will be used to decode the index of lz77 matches.
+ fce_index and fcd share the same kernel number.
+
+ REQUIREMENTS ON crc32
+ is an implementation of crc32/crc32_kernel_abstract.h
+
+ INITIAL VALUE
+ this object has no state
+
+ CONVENTION
+ this object has no state
+ !*/
+
+ const static unsigned long eof_symbol = 256;
+
+ public:
+
+ class decompression_error : public dlib::error
+ {
+ public:
+ decompression_error(
+ const char* i
+ ) :
+ dlib::error(std::string(i))
+ {}
+
+ decompression_error(
+ const std::string& i
+ ) :
+ dlib::error(i)
+ {}
+ };
+
+
+ compress_stream_kernel_2 (
+ )
+ {}
+
+ ~compress_stream_kernel_2 (
+ )
+ {}
+
+ void compress (
+ std::istream& in,
+ std::ostream& out
+ ) const;
+
+ void decompress (
+ std::istream& in,
+ std::ostream& out
+ ) const;
+
+ private:
+
+ // restricted functions
+ compress_stream_kernel_2(compress_stream_kernel_2&); // copy constructor
+ compress_stream_kernel_2& operator=(compress_stream_kernel_2&); // assignment operator
+
+ };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+ // member function definitions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename fce,
+ typename fcd,
+ typename lz77_buffer,
+ typename sliding_buffer,
+ typename fce_length,
+ typename fcd_length,
+ typename fce_index,
+ typename fcd_index,
+ typename crc32
+ >
+ void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
+ compress (
+ std::istream& in_,
+ std::ostream& out_
+ ) const
+ {
+ std::streambuf::int_type temp;
+
+ std::streambuf& in = *in_.rdbuf();
+
+ typename fce::entropy_encoder_type coder;
+ coder.set_stream(out_);
+
+ fce model(coder);
+ fce_length model_length(coder);
+ fce_index model_index(coder);
+
+ const unsigned long LOOKAHEAD_LIMIT = 512;
+ lz77_buffer buffer(15,LOOKAHEAD_LIMIT);
+
+ crc32 crc;
+
+
+ unsigned long count = 0;
+
+ unsigned long lz77_count = 1; // number of times we used lz77 to encode
+ unsigned long ppm_count = 1; // number of times we used ppm to encode
+
+
+ while (true)
+ {
+ // write out a known value every 20000 symbols
+ if (count == 20000)
+ {
+ count = 0;
+ coder.encode(150,151,400);
+ }
+ ++count;
+
+ // try to fill the lookahead buffer
+ if (buffer.get_lookahead_buffer_size() < buffer.get_lookahead_buffer_limit())
+ {
+ temp = in.sbumpc();
+ while (temp != EOF)
+ {
+ crc.add(static_cast<unsigned char>(temp));
+ buffer.add(static_cast<unsigned char>(temp));
+ if (buffer.get_lookahead_buffer_size() == buffer.get_lookahead_buffer_limit())
+ break;
+ temp = in.sbumpc();
+ }
+ }
+
+ // compute the sum of ppm_count and lz77_count but make sure
+ // it is less than 65536
+ unsigned long sum = ppm_count + lz77_count;
+ if (sum >= 65536)
+ {
+ ppm_count >>= 1;
+ lz77_count >>= 1;
+ ppm_count |= 1;
+ lz77_count |= 1;
+ sum = ppm_count+lz77_count;
+ }
+
+ // if there are still more symbols in the lookahead buffer to encode
+ if (buffer.get_lookahead_buffer_size() > 0)
+ {
+ unsigned long match_index, match_length;
+ buffer.find_match(match_index,match_length,6);
+ if (match_length != 0)
+ {
+
+ // signal the decoder that we are using lz77
+ coder.encode(0,lz77_count,sum);
+ ++lz77_count;
+
+ // encode the index and length pair
+ model_index.encode(match_index);
+ model_length.encode(match_length);
+
+ }
+ else
+ {
+
+ // signal the decoder that we are using ppm
+ coder.encode(lz77_count,sum,sum);
+ ++ppm_count;
+
+ // encode the symbol using the ppm model
+ model.encode(buffer.lookahead_buffer(0));
+ buffer.shift_buffers(1);
+ }
+ }
+ else
+ {
+ // signal the decoder that we are using ppm
+ coder.encode(lz77_count,sum,sum);
+
+
+ model.encode(eof_symbol);
+ // now write the checksum
+ unsigned long checksum = crc.get_checksum();
+ unsigned char byte1 = static_cast<unsigned char>((checksum>>24)&0xFF);
+ unsigned char byte2 = static_cast<unsigned char>((checksum>>16)&0xFF);
+ unsigned char byte3 = static_cast<unsigned char>((checksum>>8)&0xFF);
+ unsigned char byte4 = static_cast<unsigned char>((checksum)&0xFF);
+
+ model.encode(byte1);
+ model.encode(byte2);
+ model.encode(byte3);
+ model.encode(byte4);
+
+ break;
+ }
+ } // while (true)
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename fce,
+ typename fcd,
+ typename lz77_buffer,
+ typename sliding_buffer,
+ typename fce_length,
+ typename fcd_length,
+ typename fce_index,
+ typename fcd_index,
+ typename crc32
+ >
+ void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
+ decompress (
+ std::istream& in_,
+ std::ostream& out_
+ ) const
+ {
+
+ std::streambuf& out = *out_.rdbuf();
+
+ typename fcd::entropy_decoder_type coder;
+ coder.set_stream(in_);
+
+ fcd model(coder);
+ fcd_length model_length(coder);
+ fcd_index model_index(coder);
+
+ unsigned long symbol;
+ unsigned long count = 0;
+
+ sliding_buffer buffer;
+ buffer.set_size(15);
+
+ // Initialize the buffer to all zeros. There is no algorithmic reason to
+ // do this. But doing so avoids a warning from valgrind so that is why
+ // I'm doing this.
+ for (unsigned long i = 0; i < buffer.size(); ++i)
+ buffer[i] = 0;
+
+ crc32 crc;
+
+ unsigned long lz77_count = 1; // number of times we used lz77 to encode
+ unsigned long ppm_count = 1; // number of times we used ppm to encode
+ bool next_block_lz77;
+
+
+ // decode until we hit the marker symbol
+ while (true)
+ {
+ // make sure this is the value we expect
+ if (count == 20000)
+ {
+ if (coder.get_target(400) != 150)
+ {
+ throw decompression_error("Error detected in compressed data stream.");
+ }
+ count = 0;
+ coder.decode(150,151);
+ }
+ ++count;
+
+
+ // compute the sum of ppm_count and lz77_count but make sure
+ // it is less than 65536
+ unsigned long sum = ppm_count + lz77_count;
+ if (sum >= 65536)
+ {
+ ppm_count >>= 1;
+ lz77_count >>= 1;
+ ppm_count |= 1;
+ lz77_count |= 1;
+ sum = ppm_count+lz77_count;
+ }
+
+ // check if we are decoding a lz77 or ppm block
+ if (coder.get_target(sum) < lz77_count)
+ {
+ coder.decode(0,lz77_count);
+ next_block_lz77 = true;
+ ++lz77_count;
+ }
+ else
+ {
+ coder.decode(lz77_count,sum);
+ next_block_lz77 = false;
+ ++ppm_count;
+ }
+
+
+ if (next_block_lz77)
+ {
+
+ unsigned long match_length, match_index;
+ // decode the match index
+ model_index.decode(match_index);
+
+ // decode the match length
+ model_length.decode(match_length);
+
+
+ match_index += match_length;
+ buffer.rotate_left(match_length);
+ for (unsigned long i = 0; i < match_length; ++i)
+ {
+ unsigned char ch = buffer[match_index-i];
+ buffer[match_length-i-1] = ch;
+
+ crc.add(ch);
+ // write this ch to out
+ if (out.sputc(static_cast<char>(ch)) != static_cast<int>(ch))
+ {
+ throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
+ }
+ }
+
+ }
+ else
+ {
+
+ // decode the next symbol
+ model.decode(symbol);
+ if (symbol != eof_symbol)
+ {
+ buffer.rotate_left(1);
+ buffer[0] = static_cast<unsigned char>(symbol);
+
+
+ crc.add(static_cast<unsigned char>(symbol));
+ // write this symbol to out
+ if (out.sputc(static_cast<char>(symbol)) != static_cast<int>(symbol))
+ {
+ throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
+ }
+ }
+ else
+ {
+ // this was the eof marker symbol so we are done. now check the checksum
+
+ // now get the checksum and make sure it matches
+ unsigned char byte1;
+ unsigned char byte2;
+ unsigned char byte3;
+ unsigned char byte4;
+
+ model.decode(symbol); byte1 = static_cast<unsigned char>(symbol);
+ model.decode(symbol); byte2 = static_cast<unsigned char>(symbol);
+ model.decode(symbol); byte3 = static_cast<unsigned char>(symbol);
+ model.decode(symbol); byte4 = static_cast<unsigned char>(symbol);
+
+ unsigned long checksum = byte1;
+ checksum <<= 8;
+ checksum |= byte2;
+ checksum <<= 8;
+ checksum |= byte3;
+ checksum <<= 8;
+ checksum |= byte4;
+
+ if (checksum != crc.get_checksum())
+ throw decompression_error("Error detected in compressed data stream.");
+
+ break;
+ }
+ }
+
+ } // while (true)
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_COMPRESS_STREAM_KERNEl_2_
+