summaryrefslogtreecommitdiffstats
path: root/debian/vendor-h2o/deps/brotli/enc/encode.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/vendor-h2o/deps/brotli/enc/encode.cc')
-rw-r--r--debian/vendor-h2o/deps/brotli/enc/encode.cc958
1 files changed, 0 insertions, 958 deletions
diff --git a/debian/vendor-h2o/deps/brotli/enc/encode.cc b/debian/vendor-h2o/deps/brotli/enc/encode.cc
deleted file mode 100644
index ffd31e4..0000000
--- a/debian/vendor-h2o/deps/brotli/enc/encode.cc
+++ /dev/null
@@ -1,958 +0,0 @@
-/* Copyright 2013 Google Inc. All Rights Reserved.
-
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-// Implementation of Brotli compressor.
-
-#include "./encode.h"
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstring>
-#include <limits>
-
-#include "./backward_references.h"
-#include "./bit_cost.h"
-#include "./block_splitter.h"
-#include "./brotli_bit_stream.h"
-#include "./cluster.h"
-#include "./context.h"
-#include "./metablock.h"
-#include "./transform.h"
-#include "./compress_fragment.h"
-#include "./compress_fragment_two_pass.h"
-#include "./entropy_encode.h"
-#include "./fast_log.h"
-#include "./hash.h"
-#include "./histogram.h"
-#include "./prefix.h"
-#include "./utf8_util.h"
-#include "./write_bits.h"
-
-namespace brotli {
-
-static const int kMinQualityForBlockSplit = 4;
-static const int kMinQualityForContextModeling = 5;
-static const int kMinQualityForOptimizeHistograms = 4;
-// For quality 2 there is no block splitting, so we buffer at most this much
-// literals and commands.
-static const int kMaxNumDelayedSymbols = 0x2fff;
-
-#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
-
-void RecomputeDistancePrefixes(Command* cmds,
- size_t num_commands,
- uint32_t num_direct_distance_codes,
- uint32_t distance_postfix_bits) {
- if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
- return;
- }
- for (size_t i = 0; i < num_commands; ++i) {
- Command* cmd = &cmds[i];
- if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
- PrefixEncodeCopyDistance(cmd->DistanceCode(),
- num_direct_distance_codes,
- distance_postfix_bits,
- &cmd->dist_prefix_,
- &cmd->dist_extra_);
- }
- }
-}
-
-/* Wraps 64-bit input position to 32-bit ringbuffer position preserving
- "not-a-first-lap" feature. */
-uint32_t WrapPosition(uint64_t position) {
- uint32_t result = static_cast<uint32_t>(position);
- if (position > (1u << 30)) {
- result = (result & ((1u << 30) - 1)) | (1u << 30);
- }
- return result;
-}
-
-uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
- if (storage_size_ < size) {
- delete[] storage_;
- storage_ = new uint8_t[size];
- storage_size_ = size;
- }
- return storage_;
-}
-
-size_t MaxHashTableSize(int quality) {
- return quality == 0 ? 1 << 15 : 1 << 17;
-}
-
-size_t HashTableSize(size_t max_table_size, size_t input_size) {
- size_t htsize = 256;
- while (htsize < max_table_size && htsize < input_size) {
- htsize <<= 1;
- }
- return htsize;
-}
-
-int* BrotliCompressor::GetHashTable(int quality,
- size_t input_size,
- size_t* table_size) {
- // Use smaller hash table when input.size() is smaller, since we
- // fill the table, incurring O(hash table size) overhead for
- // compression, and if the input is short, we won't need that
- // many hash table entries anyway.
- const size_t max_table_size = MaxHashTableSize(quality);
- assert(max_table_size >= 256);
- size_t htsize = HashTableSize(max_table_size, input_size);
-
- int* table;
- if (htsize <= sizeof(small_table_) / sizeof(small_table_[0])) {
- table = small_table_;
- } else {
- if (large_table_ == NULL) {
- large_table_ = new int[max_table_size];
- }
- table = large_table_;
- }
-
- *table_size = htsize;
- memset(table, 0, htsize * sizeof(*table));
- return table;
-}
-
-void EncodeWindowBits(int lgwin, uint8_t* last_byte, uint8_t* last_byte_bits) {
- if (lgwin == 16) {
- *last_byte = 0;
- *last_byte_bits = 1;
- } else if (lgwin == 17) {
- *last_byte = 1;
- *last_byte_bits = 7;
- } else if (lgwin > 17) {
- *last_byte = static_cast<uint8_t>(((lgwin - 17) << 1) | 1);
- *last_byte_bits = 4;
- } else {
- *last_byte = static_cast<uint8_t>(((lgwin - 8) << 4) | 1);
- *last_byte_bits = 7;
- }
-}
-
-// Initializes the command and distance prefix codes for the first block.
-void InitCommandPrefixCodes(uint8_t cmd_depths[128],
- uint16_t cmd_bits[128],
- uint8_t cmd_code[512],
- size_t* cmd_code_numbits) {
- static const uint8_t kDefaultCommandDepths[128] = {
- 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
- 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
- 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
- 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
- 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- };
- static const uint16_t kDefaultCommandBits[128] = {
- 0, 0, 8, 9, 3, 35, 7, 71,
- 39, 103, 23, 47, 175, 111, 239, 31,
- 0, 0, 0, 4, 12, 2, 10, 6,
- 13, 29, 11, 43, 27, 59, 87, 55,
- 15, 79, 319, 831, 191, 703, 447, 959,
- 0, 14, 1, 25, 5, 21, 19, 51,
- 119, 159, 95, 223, 479, 991, 63, 575,
- 127, 639, 383, 895, 255, 767, 511, 1023,
- 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
- 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
- 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
- };
- COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
- COPY_ARRAY(cmd_bits, kDefaultCommandBits);
-
- // Initialize the pre-compressed form of the command and distance prefix
- // codes.
- static const uint8_t kDefaultCommandCode[] = {
- 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
- 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
- 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
- 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
- 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
- };
- static const int kDefaultCommandCodeNumBits = 448;
- COPY_ARRAY(cmd_code, kDefaultCommandCode);
- *cmd_code_numbits = kDefaultCommandCodeNumBits;
-}
-
-BrotliCompressor::BrotliCompressor(BrotliParams params)
- : params_(params),
- hashers_(new Hashers()),
- input_pos_(0),
- num_commands_(0),
- num_literals_(0),
- last_insert_len_(0),
- last_flush_pos_(0),
- last_processed_pos_(0),
- prev_byte_(0),
- prev_byte2_(0),
- storage_size_(0),
- storage_(0),
- large_table_(NULL),
- cmd_code_numbits_(0),
- command_buf_(NULL),
- literal_buf_(NULL) {
- // Sanitize params.
- params_.quality = std::max(0, params_.quality);
- if (params_.lgwin < kMinWindowBits) {
- params_.lgwin = kMinWindowBits;
- } else if (params_.lgwin > kMaxWindowBits) {
- params_.lgwin = kMaxWindowBits;
- }
- if (params_.quality <= 1) {
- params_.lgblock = params_.lgwin;
- } else if (params_.quality < kMinQualityForBlockSplit) {
- params_.lgblock = 14;
- } else if (params_.lgblock == 0) {
- params_.lgblock = 16;
- if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
- params_.lgblock = std::min(21, params_.lgwin);
- }
- } else {
- params_.lgblock = std::min(kMaxInputBlockBits,
- std::max(kMinInputBlockBits, params_.lgblock));
- }
-
- // Initialize input and literal cost ring buffers.
- // We allocate at least lgwin + 1 bits for the ring buffer so that the newly
- // added block fits there completely and we still get lgwin bits and at least
- // read_block_size_bits + 1 bits because the copy tail length needs to be
- // smaller than ringbuffer size.
- int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
- ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
-
- commands_ = 0;
- cmd_alloc_size_ = 0;
-
- // Initialize last byte with stream header.
- EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);
-
- // Initialize distance cache.
- dist_cache_[0] = 4;
- dist_cache_[1] = 11;
- dist_cache_[2] = 15;
- dist_cache_[3] = 16;
- // Save the state of the distance cache in case we need to restore it for
- // emitting an uncompressed block.
- memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
-
- if (params_.quality == 0) {
- InitCommandPrefixCodes(cmd_depths_, cmd_bits_,
- cmd_code_, &cmd_code_numbits_);
- } else if (params_.quality == 1) {
- command_buf_ = new uint32_t[kCompressFragmentTwoPassBlockSize];
- literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize];
- }
-
- // Initialize hashers.
- hash_type_ = std::min(10, params_.quality);
- hashers_->Init(hash_type_);
-}
-
-BrotliCompressor::~BrotliCompressor() {
- delete[] storage_;
- free(commands_);
- delete ringbuffer_;
- delete hashers_;
- delete[] large_table_;
- delete[] command_buf_;
- delete[] literal_buf_;
-}
-
-void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
- const uint8_t* input_buffer) {
- ringbuffer_->Write(input_buffer, input_size);
- input_pos_ += input_size;
-
- // TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
- // hashing not depend on uninitialized data. This makes compression
- // deterministic and it prevents uninitialized memory warnings in Valgrind.
- // Even without erasing, the output would be valid (but nondeterministic).
- //
- // Background information: The compressor stores short (at most 8 bytes)
- // substrings of the input already read in a hash table, and detects
- // repetitions by looking up such substrings in the hash table. If it
- // can find a substring, it checks whether the substring is really there
- // in the ring buffer (or it's just a hash collision). Should the hash
- // table become corrupt, this check makes sure that the output is
- // still valid, albeit the compression ratio would be bad.
- //
- // The compressor populates the hash table from the ring buffer as it's
- // reading new bytes from the input. However, at the last few indexes of
- // the ring buffer, there are not enough bytes to build full-length
- // substrings from. Since the hash table always contains full-length
- // substrings, we erase with dummy 0s here to make sure that those
- // substrings will contain 0s at the end instead of uninitialized
- // data.
- //
- // Please note that erasing is not necessary (because the
- // memory region is already initialized since he ring buffer
- // has a `tail' that holds a copy of the beginning,) so we
- // skip erasing if we have already gone around at least once in
- // the ring buffer.
- size_t pos = ringbuffer_->position();
- // Only clear during the first round of ringbuffer writes. On
- // subsequent rounds data in the ringbuffer would be affected.
- if (pos <= ringbuffer_->mask()) {
- // This is the first time when the ring buffer is being written.
- // We clear 7 bytes just after the bytes that have been copied from
- // the input buffer.
- //
- // The ringbuffer has a "tail" that holds a copy of the beginning,
- // but only once the ring buffer has been fully written once, i.e.,
- // pos <= mask. For the first time, we need to write values
- // in this tail (where index may be larger than mask), so that
- // we have exactly defined behavior and don't read un-initialized
- // memory. Due to performance reasons, hashing reads data using a
- // LOAD64, which can go 7 bytes beyond the bytes written in the
- // ringbuffer.
- memset(ringbuffer_->start() + pos, 0, 7);
- }
-}
-
-void BrotliCompressor::BrotliSetCustomDictionary(
- const size_t size, const uint8_t* dict) {
- CopyInputToRingBuffer(size, dict);
- last_flush_pos_ = size;
- last_processed_pos_ = size;
- if (size > 0) {
- prev_byte_ = dict[size - 1];
- }
- if (size > 1) {
- prev_byte2_ = dict[size - 2];
- }
- hashers_->PrependCustomDictionary(hash_type_, params_.lgwin, size, dict);
-}
-
-bool BrotliCompressor::WriteBrotliData(const bool is_last,
- const bool force_flush,
- size_t* out_size,
- uint8_t** output) {
- const uint64_t delta = input_pos_ - last_processed_pos_;
- const uint8_t* data = ringbuffer_->start();
- const uint32_t mask = ringbuffer_->mask();
-
- if (delta > input_block_size()) {
- return false;
- }
- const uint32_t bytes = static_cast<uint32_t>(delta);
-
- if (params_.quality <= 1) {
- if (delta == 0 && !is_last) {
- // We have no new input data and we don't have to finish the stream, so
- // nothing to do.
- *out_size = 0;
- return true;
- }
- const size_t max_out_size = 2 * bytes + 500;
- uint8_t* storage = GetBrotliStorage(max_out_size);
- storage[0] = last_byte_;
- size_t storage_ix = last_byte_bits_;
- size_t table_size;
- int* table = GetHashTable(params_.quality, bytes, &table_size);
- if (params_.quality == 0) {
- BrotliCompressFragmentFast(
- &data[WrapPosition(last_processed_pos_) & mask],
- bytes, is_last,
- table, table_size,
- cmd_depths_, cmd_bits_,
- &cmd_code_numbits_, cmd_code_,
- &storage_ix, storage);
- } else {
- BrotliCompressFragmentTwoPass(
- &data[WrapPosition(last_processed_pos_) & mask],
- bytes, is_last,
- command_buf_, literal_buf_,
- table, table_size,
- &storage_ix, storage);
- }
- last_byte_ = storage[storage_ix >> 3];
- last_byte_bits_ = storage_ix & 7u;
- last_processed_pos_ = input_pos_;
- *output = &storage[0];
- *out_size = storage_ix >> 3;
- return true;
- }
-
- // Theoretical max number of commands is 1 per 2 bytes.
- size_t newsize = num_commands_ + bytes / 2 + 1;
- if (newsize > cmd_alloc_size_) {
- // Reserve a bit more memory to allow merging with a next block
- // without realloc: that would impact speed.
- newsize += (bytes / 4) + 16;
- cmd_alloc_size_ = newsize;
- commands_ =
- static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
- }
-
- CreateBackwardReferences(bytes, WrapPosition(last_processed_pos_),
- is_last, data, mask,
- params_.quality,
- params_.lgwin,
- hashers_,
- hash_type_,
- dist_cache_,
- &last_insert_len_,
- &commands_[num_commands_],
- &num_commands_,
- &num_literals_);
-
- size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
- if (!is_last && !force_flush &&
- (params_.quality >= kMinQualityForBlockSplit ||
- (num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
- input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
- // Merge with next input block. Everything will happen later.
- last_processed_pos_ = input_pos_;
- *out_size = 0;
- return true;
- }
-
- // Create the last insert-only command.
- if (last_insert_len_ > 0) {
- brotli::Command cmd(last_insert_len_);
- commands_[num_commands_++] = cmd;
- num_literals_ += last_insert_len_;
- last_insert_len_ = 0;
- }
-
- WriteMetaBlockInternal(is_last, out_size, output);
- return true;
-}
-
-// Decide about the context map based on the ability of the prediction
-// ability of the previous byte UTF8-prefix on the next byte. The
-// prediction ability is calculated as shannon entropy. Here we need
-// shannon entropy instead of 'BitsEntropy' since the prefix will be
-// encoded with the remaining 6 bits of the following byte, and
-// BitsEntropy will assume that symbol to be stored alone using Huffman
-// coding.
-void ChooseContextMap(int quality,
- uint32_t* bigram_histo,
- size_t* num_literal_contexts,
- const uint32_t** literal_context_map) {
- uint32_t monogram_histo[3] = { 0 };
- uint32_t two_prefix_histo[6] = { 0 };
- size_t total = 0;
- for (size_t i = 0; i < 9; ++i) {
- total += bigram_histo[i];
- monogram_histo[i % 3] += bigram_histo[i];
- size_t j = i;
- if (j >= 6) {
- j -= 6;
- }
- two_prefix_histo[j] += bigram_histo[i];
- }
- size_t dummy;
- double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
- double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
- ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
- double entropy3 = 0;
- for (size_t k = 0; k < 3; ++k) {
- entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
- }
-
- assert(total != 0);
- double scale = 1.0 / static_cast<double>(total);
- entropy1 *= scale;
- entropy2 *= scale;
- entropy3 *= scale;
-
- static const uint32_t kStaticContextMapContinuation[64] = {
- 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- static const uint32_t kStaticContextMapSimpleUTF8[64] = {
- 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- if (quality < 7) {
- // 3 context models is a bit slower, don't use it at lower qualities.
- entropy3 = entropy1 * 10;
- }
- // If expected savings by symbol are less than 0.2 bits, skip the
- // context modeling -- in exchange for faster decoding speed.
- if (entropy1 - entropy2 < 0.2 &&
- entropy1 - entropy3 < 0.2) {
- *num_literal_contexts = 1;
- } else if (entropy2 - entropy3 < 0.02) {
- *num_literal_contexts = 2;
- *literal_context_map = kStaticContextMapSimpleUTF8;
- } else {
- *num_literal_contexts = 3;
- *literal_context_map = kStaticContextMapContinuation;
- }
-}
-
-void DecideOverLiteralContextModeling(const uint8_t* input,
- size_t start_pos,
- size_t length,
- size_t mask,
- int quality,
- ContextType* literal_context_mode,
- size_t* num_literal_contexts,
- const uint32_t** literal_context_map) {
- if (quality < kMinQualityForContextModeling || length < 64) {
- return;
- }
- // Gather bigram data of the UTF8 byte prefixes. To make the analysis of
- // UTF8 data faster we only examine 64 byte long strides at every 4kB
- // intervals.
- const size_t end_pos = start_pos + length;
- uint32_t bigram_prefix_histo[9] = { 0 };
- for (; start_pos + 64 <= end_pos; start_pos += 4096) {
- static const int lut[4] = { 0, 0, 1, 2 };
- const size_t stride_end_pos = start_pos + 64;
- int prev = lut[input[start_pos & mask] >> 6] * 3;
- for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
- const uint8_t literal = input[pos & mask];
- ++bigram_prefix_histo[prev + lut[literal >> 6]];
- prev = lut[literal >> 6] * 3;
- }
- }
- *literal_context_mode = CONTEXT_UTF8;
- ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
- literal_context_map);
-}
-
-void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
- size_t* out_size,
- uint8_t** output) {
- if (!is_last && input_pos_ == last_flush_pos_) {
- // We have no new input data and we don't have to finish the stream, so
- // nothing to do.
- *out_size = 0;
- return;
- }
- assert(input_pos_ >= last_flush_pos_);
- assert(input_pos_ > last_flush_pos_ || is_last);
- assert(input_pos_ - last_flush_pos_ <= 1u << 24);
- const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_);
- const uint8_t* data = ringbuffer_->start();
- const uint32_t mask = ringbuffer_->mask();
- const size_t max_out_size = 2 * bytes + 500;
- uint8_t* storage = GetBrotliStorage(max_out_size);
- storage[0] = last_byte_;
- size_t storage_ix = last_byte_bits_;
-
- bool uncompressed = false;
- if (num_commands_ < (bytes >> 8) + 2) {
- if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
- uint32_t literal_histo[256] = { 0 };
- static const uint32_t kSampleRate = 13;
- static const double kMinEntropy = 7.92;
- const double bit_cost_threshold =
- static_cast<double>(bytes) * kMinEntropy / kSampleRate;
- size_t t = (bytes + kSampleRate - 1) / kSampleRate;
- uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
- for (size_t i = 0; i < t; i++) {
- ++literal_histo[data[pos & mask]];
- pos += kSampleRate;
- }
- if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
- uncompressed = true;
- }
- }
- }
-
- if (bytes == 0) {
- // Write the ISLAST and ISEMPTY bits.
- WriteBits(2, 3, &storage_ix, &storage[0]);
- storage_ix = (storage_ix + 7u) & ~7u;
- } else if (uncompressed) {
- // Restore the distance cache, as its last update by
- // CreateBackwardReferences is now unused.
- memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
- StoreUncompressedMetaBlock(is_last, data,
- WrapPosition(last_flush_pos_), mask, bytes,
- &storage_ix,
- &storage[0]);
- } else {
- uint32_t num_direct_distance_codes = 0;
- uint32_t distance_postfix_bits = 0;
- if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
- num_direct_distance_codes = 12;
- distance_postfix_bits = 1;
- RecomputeDistancePrefixes(commands_,
- num_commands_,
- num_direct_distance_codes,
- distance_postfix_bits);
- }
- if (params_.quality == 2) {
- StoreMetaBlockFast(data, WrapPosition(last_flush_pos_),
- bytes, mask, is_last,
- commands_, num_commands_,
- &storage_ix,
- &storage[0]);
- } else if (params_.quality < kMinQualityForBlockSplit) {
- StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
- bytes, mask, is_last,
- commands_, num_commands_,
- &storage_ix,
- &storage[0]);
- } else {
- MetaBlockSplit mb;
- ContextType literal_context_mode = CONTEXT_UTF8;
- if (params_.quality <= 9) {
- size_t num_literal_contexts = 1;
- const uint32_t* literal_context_map = NULL;
- DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
- bytes, mask,
- params_.quality,
- &literal_context_mode,
- &num_literal_contexts,
- &literal_context_map);
- if (literal_context_map == NULL) {
- BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
- commands_, num_commands_,
- &mb);
- } else {
- BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
- mask,
- prev_byte_, prev_byte2_,
- literal_context_mode,
- num_literal_contexts,
- literal_context_map,
- commands_, num_commands_,
- &mb);
- }
- } else {
- if (!IsMostlyUTF8(
- data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
- literal_context_mode = CONTEXT_SIGNED;
- }
- BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
- prev_byte_, prev_byte2_,
- commands_, num_commands_,
- literal_context_mode,
- &mb);
- }
- if (params_.quality >= kMinQualityForOptimizeHistograms) {
- OptimizeHistograms(num_direct_distance_codes,
- distance_postfix_bits,
- &mb);
- }
- StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
- prev_byte_, prev_byte2_,
- is_last,
- num_direct_distance_codes,
- distance_postfix_bits,
- literal_context_mode,
- commands_, num_commands_,
- mb,
- &storage_ix,
- &storage[0]);
- }
- if (bytes + 4 < (storage_ix >> 3)) {
- // Restore the distance cache and last byte.
- memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
- storage[0] = last_byte_;
- storage_ix = last_byte_bits_;
- StoreUncompressedMetaBlock(is_last, data,
- WrapPosition(last_flush_pos_), mask,
- bytes, &storage_ix, &storage[0]);
- }
- }
- last_byte_ = storage[storage_ix >> 3];
- last_byte_bits_ = storage_ix & 7u;
- last_flush_pos_ = input_pos_;
- last_processed_pos_ = input_pos_;
- prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
- prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
- num_commands_ = 0;
- num_literals_ = 0;
- // Save the state of the distance cache in case we need to restore it for
- // emitting an uncompressed block.
- memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
- *output = &storage[0];
- *out_size = storage_ix >> 3;
-}
-
-bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
- const uint8_t* input_buffer,
- const bool is_last,
- size_t* encoded_size,
- uint8_t* encoded_buffer) {
- CopyInputToRingBuffer(input_size, input_buffer);
- size_t out_size = 0;
- uint8_t* output;
- if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
- out_size > *encoded_size) {
- return false;
- }
- if (out_size > 0) {
- memcpy(encoded_buffer, output, out_size);
- }
- *encoded_size = out_size;
- return true;
-}
-
-bool BrotliCompressor::WriteMetadata(const size_t input_size,
- const uint8_t* input_buffer,
- const bool is_last,
- size_t* encoded_size,
- uint8_t* encoded_buffer) {
- if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
- return false;
- }
- uint64_t hdr_buffer_data[2];
- uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
- size_t storage_ix = last_byte_bits_;
- hdr_buffer[0] = last_byte_;
- WriteBits(1, 0, &storage_ix, hdr_buffer);
- WriteBits(2, 3, &storage_ix, hdr_buffer);
- WriteBits(1, 0, &storage_ix, hdr_buffer);
- if (input_size == 0) {
- WriteBits(2, 0, &storage_ix, hdr_buffer);
- *encoded_size = (storage_ix + 7u) >> 3;
- memcpy(encoded_buffer, hdr_buffer, *encoded_size);
- } else {
- uint32_t nbits = (input_size == 1) ? 0 : (Log2FloorNonZero(
- static_cast<uint32_t>(input_size) - 1) + 1);
- uint32_t nbytes = (nbits + 7) / 8;
- WriteBits(2, nbytes, &storage_ix, hdr_buffer);
- WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
- size_t hdr_size = (storage_ix + 7u) >> 3;
- memcpy(encoded_buffer, hdr_buffer, hdr_size);
- memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
- *encoded_size = hdr_size + input_size;
- }
- if (is_last) {
- encoded_buffer[(*encoded_size)++] = 3;
- }
- last_byte_ = 0;
- last_byte_bits_ = 0;
- return true;
-}
-
-bool BrotliCompressor::FinishStream(
- size_t* encoded_size, uint8_t* encoded_buffer) {
- return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
-}
-
-int BrotliCompressBuffer(BrotliParams params,
- size_t input_size,
- const uint8_t* input_buffer,
- size_t* encoded_size,
- uint8_t* encoded_buffer) {
- if (*encoded_size == 0) {
- // Output buffer needs at least one byte.
- return 0;
- }
- BrotliMemIn in(input_buffer, input_size);
- BrotliMemOut out(encoded_buffer, *encoded_size);
- if (!BrotliCompress(params, &in, &out)) {
- return 0;
- }
- *encoded_size = out.position();
- return 1;
-}
-
-bool BrotliInIsFinished(BrotliIn* r) {
- size_t read_bytes;
- return r->Read(0, &read_bytes) == NULL;
-}
-
-const uint8_t* BrotliInReadAndCheckEnd(const size_t block_size,
- BrotliIn* r,
- size_t* bytes_read,
- bool* is_last) {
- *bytes_read = 0;
- const uint8_t* data = reinterpret_cast<const uint8_t*>(
- r->Read(block_size, bytes_read));
- assert((data == NULL) == (*bytes_read == 0));
- *is_last = BrotliInIsFinished(r);
- return data;
-}
-
-bool CopyOneBlockToRingBuffer(BrotliIn* r,
- BrotliCompressor* compressor,
- size_t* bytes_read,
- bool* is_last) {
- const size_t block_size = compressor->input_block_size();
- const uint8_t* data = BrotliInReadAndCheckEnd(block_size, r,
- bytes_read, is_last);
- if (data == NULL) {
- return *is_last;
- }
- compressor->CopyInputToRingBuffer(*bytes_read, data);
-
- // Read more bytes until block_size is filled or an EOF (data == NULL) is
- // received. This is useful to get deterministic compressed output for the
- // same input no matter how r->Read splits the input to chunks.
- for (size_t remaining = block_size - *bytes_read; remaining > 0; ) {
- size_t more_bytes_read = 0;
- data = BrotliInReadAndCheckEnd(remaining, r, &more_bytes_read, is_last);
- if (data == NULL) {
- return *is_last;
- }
- compressor->CopyInputToRingBuffer(more_bytes_read, data);
- *bytes_read += more_bytes_read;
- remaining -= more_bytes_read;
- }
- return true;
-}
-
-
-int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
- return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
-}
-
-// Reads the provided input in 'block_size' blocks. Only the last read can be
-// smaller than 'block_size'.
-class BrotliBlockReader {
- public:
- explicit BrotliBlockReader(size_t block_size)
- : block_size_(block_size), buf_(NULL) {}
- ~BrotliBlockReader() { delete[] buf_; }
-
- const uint8_t* Read(BrotliIn* in, size_t* bytes_read, bool* is_last) {
- *bytes_read = 0;
- const uint8_t* data = BrotliInReadAndCheckEnd(block_size_, in,
- bytes_read, is_last);
- if (data == NULL || *bytes_read == block_size_ || *is_last) {
- // If we could get the whole block in one read, or it is the last block,
- // we just return the pointer to the data without copying.
- return data;
- }
- // If the data comes in smaller chunks, we need to copy it into an internal
- // buffer until we get a whole block or reach the last chunk.
- if (buf_ == NULL) {
- buf_ = new uint8_t[block_size_];
- }
- memcpy(buf_, data, *bytes_read);
- do {
- size_t cur_bytes_read = 0;
- data = BrotliInReadAndCheckEnd(block_size_ - *bytes_read, in,
- &cur_bytes_read, is_last);
- if (data == NULL) {
- return *is_last ? buf_ : NULL;
- }
- memcpy(&buf_[*bytes_read], data, cur_bytes_read);
- *bytes_read += cur_bytes_read;
- } while (*bytes_read < block_size_ && !*is_last);
- return buf_;
- }
-
- private:
- const size_t block_size_;
- uint8_t* buf_;
-};
-
-int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
- BrotliParams params,
- BrotliIn* in, BrotliOut* out) {
- if (params.quality <= 1) {
- const int quality = std::max(0, params.quality);
- const int lgwin = std::min(kMaxWindowBits,
- std::max(kMinWindowBits, params.lgwin));
- uint8_t* storage = NULL;
- int* table = NULL;
- uint32_t* command_buf = NULL;
- uint8_t* literal_buf = NULL;
- uint8_t cmd_depths[128];
- uint16_t cmd_bits[128];
- uint8_t cmd_code[512];
- size_t cmd_code_numbits;
- if (quality == 0) {
- InitCommandPrefixCodes(cmd_depths, cmd_bits, cmd_code, &cmd_code_numbits);
- }
- uint8_t last_byte;
- uint8_t last_byte_bits;
- EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
- BrotliBlockReader r(1u << lgwin);
- int ok = 1;
- bool is_last = false;
- while (ok && !is_last) {
- // Read next block of input.
- size_t bytes;
- const uint8_t* data = r.Read(in, &bytes, &is_last);
- if (data == NULL) {
- if (!is_last) {
- ok = 0;
- break;
- }
- assert(bytes == 0);
- }
- // Set up output storage.
- const size_t max_out_size = 2 * bytes + 500;
- if (storage == NULL) {
- storage = new uint8_t[max_out_size];
- }
- storage[0] = last_byte;
- size_t storage_ix = last_byte_bits;
- // Set up hash table.
- size_t htsize = HashTableSize(MaxHashTableSize(quality), bytes);
- if (table == NULL) {
- table = new int[htsize];
- }
- memset(table, 0, htsize * sizeof(table[0]));
- // Set up command and literal buffers for two pass mode.
- if (quality == 1 && command_buf == NULL) {
- size_t buf_size = std::min(bytes, kCompressFragmentTwoPassBlockSize);
- command_buf = new uint32_t[buf_size];
- literal_buf = new uint8_t[buf_size];
- }
- // Do the actual compression.
- if (quality == 0) {
- BrotliCompressFragmentFast(data, bytes, is_last, table, htsize,
- cmd_depths, cmd_bits,
- &cmd_code_numbits, cmd_code,
- &storage_ix, storage);
- } else {
- BrotliCompressFragmentTwoPass(data, bytes, is_last,
- command_buf, literal_buf,
- table, htsize,
- &storage_ix, storage);
- }
- // Save last bytes to stitch it together with the next output block.
- last_byte = storage[storage_ix >> 3];
- last_byte_bits = storage_ix & 7u;
- // Write output block.
- size_t out_bytes = storage_ix >> 3;
- if (out_bytes > 0 && !out->Write(storage, out_bytes)) {
- ok = 0;
- break;
- }
- }
- delete[] storage;
- delete[] table;
- delete[] command_buf;
- delete[] literal_buf;
- return ok;
- }
-
- size_t in_bytes = 0;
- size_t out_bytes = 0;
- uint8_t* output;
- bool final_block = false;
- BrotliCompressor compressor(params);
- if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
- while (!final_block) {
- if (!CopyOneBlockToRingBuffer(in, &compressor, &in_bytes, &final_block)) {
- return false;
- }
- out_bytes = 0;
- if (!compressor.WriteBrotliData(final_block,
- /* force_flush = */ false,
- &out_bytes, &output)) {
- return false;
- }
- if (out_bytes > 0 && !out->Write(output, out_bytes)) {
- return false;
- }
- }
- return true;
-}
-
-
-} // namespace brotli