Merging upstream version 1.44.3.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:19:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:20:02 +0000
commit: 58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree: 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
parent: Releasing debian version 1.43.2-1. (diff)
download: netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
1 files changed, 431 insertions, 0 deletions
diff --git a/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h b/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
new file mode 100644
index 000000000..e46b23fad
--- /dev/null
+++ b/ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
@@ -0,0 +1,431 @@
+// Copyright (C) 2003  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_COMPRESS_STREAM_KERNEl_2_
+#define DLIB_COMPRESS_STREAM_KERNEl_2_
+
+#include "../algs.h"
+#include <iostream>
+#include <streambuf>
+#include "compress_stream_kernel_abstract.h"
+
+namespace dlib
+{
+
+    template <
+        typename fce,
+        typename fcd,
+        typename lz77_buffer,
+        typename sliding_buffer,
+        typename fce_length,
+        typename fcd_length,
+        typename fce_index,
+        typename fcd_index,
+        typename crc32
+        >
+    class compress_stream_kernel_2
+    {
+        /*!
+            REQUIREMENTS ON fce
+                is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+                the alphabet_size of fce must be 257.
+                fce and fcd share the same kernel number.
+
+            REQUIREMENTS ON fcd
+                is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+                the alphabet_size of fcd must be 257.
+                fce and fcd share the same kernel number.
+
+            REQUIREMENTS ON lz77_buffer
+                is an implementation of lz77_buffer/lz77_buffer_kernel_abstract.h
+
+            REQUIREMENTS ON sliding_buffer
+                is an implementation of sliding_buffer/sliding_buffer_kernel_abstract.h
+                is instantiated with T = unsigned char
+
+            REQUIREMENTS ON fce_length
+                is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+                the alphabet_size of fce must be 513.  This will be used to encode the length of lz77 matches.
+                fce_length and fcd share the same kernel number.
+
+            REQUIREMENTS ON fcd_length
+                is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+                the alphabet_size of fcd must be 513.  This will be used to decode the length of lz77 matches.
+                fce_length and fcd share the same kernel number.
+
+            REQUIREMENTS ON fce_index
+                is an implementation of entropy_encoder_model/entropy_encoder_model_kernel_abstract.h
+                the alphabet_size of fce must be 32257.  This will be used to encode the index of lz77 matches.
+                fce_index and fcd share the same kernel number.
+
+            REQUIREMENTS ON fcd_index
+                is an implementation of entropy_decoder_model/entropy_decoder_model_kernel_abstract.h
+                the alphabet_size of fcd must be 32257.  This will be used to decode the index of lz77 matches.
+                fce_index and fcd share the same kernel number.
+
+            REQUIREMENTS ON crc32
+                is an implementation of crc32/crc32_kernel_abstract.h
+
+            INITIAL VALUE
+                this object has no state
+
+            CONVENTION
+                this object has no state
+        !*/
+
+        const static unsigned long eof_symbol = 256;
+
+    public:
+
+        class decompression_error : public dlib::error 
+        { 
+            public: 
+                decompression_error(
+                    const char* i
+                ) :
+                    dlib::error(std::string(i))
+                {}
+
+                decompression_error(
+                    const std::string& i
+                ) :
+                    dlib::error(i)
+                {}
+        };
+
+
+        compress_stream_kernel_2 (
+        )
+        {}
+
+        ~compress_stream_kernel_2 (
+        )
+        {}
+
+        void compress (
+            std::istream& in,
+            std::ostream& out
+        ) const;
+
+        void decompress (
+            std::istream& in,
+            std::ostream& out
+        ) const;
+
+    private:
+
+        // restricted functions
+        compress_stream_kernel_2(compress_stream_kernel_2&);        // copy constructor
+        compress_stream_kernel_2& operator=(compress_stream_kernel_2&);    // assignment operator
+
+    };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+    // member function definitions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename fce,
+        typename fcd,
+        typename lz77_buffer,
+        typename sliding_buffer,
+        typename fce_length,
+        typename fcd_length,
+        typename fce_index,
+        typename fcd_index,
+        typename crc32
+        >
+    void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
+    compress (
+        std::istream& in_,
+        std::ostream& out_
+    ) const
+    {
+        std::streambuf::int_type temp;
+
+        std::streambuf& in = *in_.rdbuf();
+
+        typename fce::entropy_encoder_type coder;
+        coder.set_stream(out_);
+
+        fce model(coder);
+        fce_length model_length(coder);
+        fce_index model_index(coder);
+
+        const unsigned long LOOKAHEAD_LIMIT = 512; 
+        lz77_buffer buffer(15,LOOKAHEAD_LIMIT);
+        
+        crc32 crc;
+      
+
+        unsigned long count = 0;
+
+        unsigned long lz77_count = 1;  // number of times we used lz77 to encode
+        unsigned long ppm_count = 1;   // number of times we used ppm to encode
+
+
+        while (true)
+        {
+            // write out a known value every 20000 symbols
+            if (count == 20000)
+            {
+                count = 0;
+                coder.encode(150,151,400);
+            }
+            ++count;
+
+            // try to fill the lookahead buffer
+            if (buffer.get_lookahead_buffer_size() < buffer.get_lookahead_buffer_limit())
+            {
+                temp = in.sbumpc();
+                while (temp != EOF)
+                {
+                    crc.add(static_cast<unsigned char>(temp));
+                    buffer.add(static_cast<unsigned char>(temp));
+                    if (buffer.get_lookahead_buffer_size() == buffer.get_lookahead_buffer_limit())
+                        break;
+                    temp = in.sbumpc();
+                }
+            }
+
+            // compute the sum of ppm_count and lz77_count but make sure
+            // it is less than 65536
+            unsigned long sum = ppm_count + lz77_count;
+            if (sum >= 65536)
+            {
+                ppm_count >>= 1;                    
+                lz77_count >>= 1;
+                ppm_count |= 1;
+                lz77_count |= 1;
+                sum = ppm_count+lz77_count;                    
+            }
+
+            // if there are still more symbols in the lookahead buffer to encode
+            if (buffer.get_lookahead_buffer_size() > 0)  
+            {
+                unsigned long match_index, match_length;
+                buffer.find_match(match_index,match_length,6);
+                if (match_length != 0)
+                {
+                  
+                    // signal the decoder that we are using lz77
+                    coder.encode(0,lz77_count,sum);
+                    ++lz77_count;
+                    
+                    // encode the index and length pair
+                    model_index.encode(match_index);                   
+                    model_length.encode(match_length);                   
+
+                }
+                else
+                {
+
+                    // signal the decoder that we are using ppm 
+                    coder.encode(lz77_count,sum,sum);
+                    ++ppm_count;
+
+                    // encode the symbol using the ppm model
+                    model.encode(buffer.lookahead_buffer(0));
+                    buffer.shift_buffers(1);                    
+                }
+            }
+            else
+            {
+                // signal the decoder that we are using ppm 
+                coder.encode(lz77_count,sum,sum);
+                
+
+                model.encode(eof_symbol);
+                // now write the checksum
+                unsigned long checksum = crc.get_checksum();
+                unsigned char byte1 = static_cast<unsigned char>((checksum>>24)&0xFF);
+                unsigned char byte2 = static_cast<unsigned char>((checksum>>16)&0xFF);
+                unsigned char byte3 = static_cast<unsigned char>((checksum>>8)&0xFF);
+                unsigned char byte4 = static_cast<unsigned char>((checksum)&0xFF);
+
+                model.encode(byte1);
+                model.encode(byte2);
+                model.encode(byte3);
+                model.encode(byte4);
+
+                break;      
+            }
+        } // while (true)        
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename fce,
+        typename fcd,
+        typename lz77_buffer,
+        typename sliding_buffer,
+        typename fce_length,
+        typename fcd_length,
+        typename fce_index,
+        typename fcd_index,
+        typename crc32
+        >
+    void compress_stream_kernel_2<fce,fcd,lz77_buffer,sliding_buffer,fce_length,fcd_length,fce_index,fcd_index,crc32>::
+    decompress (
+        std::istream& in_,
+        std::ostream& out_
+    ) const
+    {
+
+        std::streambuf& out = *out_.rdbuf();
+
+        typename fcd::entropy_decoder_type coder;
+        coder.set_stream(in_);
+
+        fcd model(coder);
+        fcd_length model_length(coder);
+        fcd_index model_index(coder);
+
+        unsigned long symbol;
+        unsigned long count = 0;
+
+        sliding_buffer buffer;
+        buffer.set_size(15);
+
+        // Initialize the buffer to all zeros.  There is no algorithmic reason to
+        // do this.  But doing so avoids a warning from valgrind so that is why
+        // I'm doing this.
+        for (unsigned long i = 0; i < buffer.size(); ++i)
+              buffer[i] = 0;
+
+        crc32 crc;
+        
+        unsigned long lz77_count = 1;  // number of times we used lz77 to encode
+        unsigned long ppm_count = 1;   // number of times we used ppm to encode
+        bool next_block_lz77;
+
+
+        // decode until we hit the marker symbol
+        while (true)
+        {
+            // make sure this is the value we expect
+            if (count == 20000)
+            {
+                if (coder.get_target(400) != 150)
+                {
+                    throw decompression_error("Error detected in compressed data stream.");
+                }
+                count = 0;
+                coder.decode(150,151);
+            }
+            ++count;
+
+
+            // compute the sum of ppm_count and lz77_count but make sure
+            // it is less than 65536
+            unsigned long sum = ppm_count + lz77_count;
+            if (sum >= 65536)
+            {
+                ppm_count >>= 1;                    
+                lz77_count >>= 1;
+                ppm_count |= 1;
+                lz77_count |= 1;
+                sum = ppm_count+lz77_count;                    
+            }
+
+            // check if we are decoding a lz77 or ppm block
+            if (coder.get_target(sum) < lz77_count)
+            {
+                coder.decode(0,lz77_count);
+                next_block_lz77 = true;
+                ++lz77_count;
+            }
+            else
+            {
+                coder.decode(lz77_count,sum);
+                next_block_lz77 = false;
+                ++ppm_count;
+            }
+
+
+            if (next_block_lz77)
+            {
+                
+                unsigned long match_length, match_index;
+                // decode the match index
+                model_index.decode(match_index);
+
+                // decode the match length
+                model_length.decode(match_length);
+
+                
+                match_index += match_length;
+                buffer.rotate_left(match_length);
+                for (unsigned long i = 0; i < match_length; ++i)
+                {
+                    unsigned char ch = buffer[match_index-i];
+                    buffer[match_length-i-1] = ch;
+
+                    crc.add(ch);
+                    // write this ch to out
+                    if (out.sputc(static_cast<char>(ch)) != static_cast<int>(ch))
+                    {
+                        throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
+                    }
+                }
+                
+            }
+            else
+            {
+
+                // decode the next symbol
+                model.decode(symbol);
+                if (symbol != eof_symbol)
+                {
+                    buffer.rotate_left(1);
+                    buffer[0] = static_cast<unsigned char>(symbol);
+                    
+
+                    crc.add(static_cast<unsigned char>(symbol));
+                    // write this symbol to out
+                    if (out.sputc(static_cast<char>(symbol)) != static_cast<int>(symbol))
+                    {
+                        throw std::ios::failure("error occurred in compress_stream_kernel_2::decompress");
+                    }
+                }
+                else
+                {
+                    // this was the eof marker symbol so we are done.  now check the checksum
+
+                    // now get the checksum and make sure it matches
+                    unsigned char byte1;
+                    unsigned char byte2;
+                    unsigned char byte3;
+                    unsigned char byte4;
+
+                    model.decode(symbol); byte1 = static_cast<unsigned char>(symbol);
+                    model.decode(symbol); byte2 = static_cast<unsigned char>(symbol);
+                    model.decode(symbol); byte3 = static_cast<unsigned char>(symbol);
+                    model.decode(symbol); byte4 = static_cast<unsigned char>(symbol);
+
+                    unsigned long checksum = byte1;
+                    checksum <<= 8;
+                    checksum |= byte2;
+                    checksum <<= 8;
+                    checksum |= byte3;
+                    checksum <<= 8;
+                    checksum |= byte4;
+
+                    if (checksum != crc.get_checksum())
+                        throw decompression_error("Error detected in compressed data stream.");
+
+                    break;
+                }
+            }
+
+        } // while (true)
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_COMPRESS_STREAM_KERNEl_2_
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:19:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:20:02 +0000
commit	58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree	96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/compress_stream/compress_stream_kernel_2.h
parent	Releasing debian version 1.43.2-1. (diff)
download	netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip