diff options
Diffstat (limited to 'web/server/h2o/libh2o/deps/brotli/enc/metablock.cc')
-rw-r--r-- | web/server/h2o/libh2o/deps/brotli/enc/metablock.cc | 534 |
1 files changed, 0 insertions, 534 deletions
diff --git a/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc b/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc deleted file mode 100644 index d6ea8426f..000000000 --- a/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc +++ /dev/null @@ -1,534 +0,0 @@ -/* Copyright 2015 Google Inc. All Rights Reserved. - - Distributed under MIT license. - See file LICENSE for detail or copy at https://opensource.org/licenses/MIT -*/ - -// Algorithms for distributing the literals and commands of a metablock between -// block types and contexts. - -#include "./metablock.h" - -#include "./block_splitter.h" -#include "./context.h" -#include "./cluster.h" -#include "./histogram.h" - -namespace brotli { - -void BuildMetaBlock(const uint8_t* ringbuffer, - const size_t pos, - const size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - const Command* cmds, - size_t num_commands, - ContextType literal_context_mode, - MetaBlockSplit* mb) { - SplitBlock(cmds, num_commands, - ringbuffer, pos, mask, - &mb->literal_split, - &mb->command_split, - &mb->distance_split); - - std::vector<ContextType> literal_context_modes(mb->literal_split.num_types, - literal_context_mode); - - size_t num_literal_contexts = - mb->literal_split.num_types << kLiteralContextBits; - size_t num_distance_contexts = - mb->distance_split.num_types << kDistanceContextBits; - std::vector<HistogramLiteral> literal_histograms(num_literal_contexts); - mb->command_histograms.resize(mb->command_split.num_types); - std::vector<HistogramDistance> distance_histograms(num_distance_contexts); - BuildHistograms(cmds, num_commands, - mb->literal_split, - mb->command_split, - mb->distance_split, - ringbuffer, - pos, - mask, - prev_byte, - prev_byte2, - literal_context_modes, - &literal_histograms, - &mb->command_histograms, - &distance_histograms); - - // Histogram ids need to fit in one byte. - static const size_t kMaxNumberOfHistograms = 256; - - ClusterHistograms(literal_histograms, - 1u << kLiteralContextBits, - mb->literal_split.num_types, - kMaxNumberOfHistograms, - &mb->literal_histograms, - &mb->literal_context_map); - - ClusterHistograms(distance_histograms, - 1u << kDistanceContextBits, - mb->distance_split.num_types, - kMaxNumberOfHistograms, - &mb->distance_histograms, - &mb->distance_context_map); -} - -// Greedy block splitter for one block category (literal, command or distance). -template<typename HistogramType> -class BlockSplitter { - public: - BlockSplitter(size_t alphabet_size, - size_t min_block_size, - double split_threshold, - size_t num_symbols, - BlockSplit* split, - std::vector<HistogramType>* histograms) - : alphabet_size_(alphabet_size), - min_block_size_(min_block_size), - split_threshold_(split_threshold), - num_blocks_(0), - split_(split), - histograms_(histograms), - target_block_size_(min_block_size), - block_size_(0), - curr_histogram_ix_(0), - merge_last_count_(0) { - size_t max_num_blocks = num_symbols / min_block_size + 1; - // We have to allocate one more histogram than the maximum number of block - // types for the current histogram when the meta-block is too big. - size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1); - split_->lengths.resize(max_num_blocks); - split_->types.resize(max_num_blocks); - histograms_->resize(max_num_types); - last_histogram_ix_[0] = last_histogram_ix_[1] = 0; - } - - // Adds the next symbol to the current histogram. When the current histogram - // reaches the target size, decides on merging the block. - void AddSymbol(size_t symbol) { - (*histograms_)[curr_histogram_ix_].Add(symbol); - ++block_size_; - if (block_size_ == target_block_size_) { - FinishBlock(/* is_final = */ false); - } - } - - // Does either of three things: - // (1) emits the current block with a new block type; - // (2) emits the current block with the type of the second last block; - // (3) merges the current block with the last block. - void FinishBlock(bool is_final) { - if (block_size_ < min_block_size_) { - block_size_ = min_block_size_; - } - if (num_blocks_ == 0) { - // Create first block. - split_->lengths[0] = static_cast<uint32_t>(block_size_); - split_->types[0] = 0; - last_entropy_[0] = - BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_); - last_entropy_[1] = last_entropy_[0]; - ++num_blocks_; - ++split_->num_types; - ++curr_histogram_ix_; - block_size_ = 0; - } else if (block_size_ > 0) { - double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0], - alphabet_size_); - HistogramType combined_histo[2]; - double combined_entropy[2]; - double diff[2]; - for (size_t j = 0; j < 2; ++j) { - size_t last_histogram_ix = last_histogram_ix_[j]; - combined_histo[j] = (*histograms_)[curr_histogram_ix_]; - combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]); - combined_entropy[j] = BitsEntropy( - &combined_histo[j].data_[0], alphabet_size_); - diff[j] = combined_entropy[j] - entropy - last_entropy_[j]; - } - - if (split_->num_types < kMaxBlockTypes && - diff[0] > split_threshold_ && - diff[1] > split_threshold_) { - // Create new block. - split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_); - split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types); - last_histogram_ix_[1] = last_histogram_ix_[0]; - last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types); - last_entropy_[1] = last_entropy_[0]; - last_entropy_[0] = entropy; - ++num_blocks_; - ++split_->num_types; - ++curr_histogram_ix_; - block_size_ = 0; - merge_last_count_ = 0; - target_block_size_ = min_block_size_; - } else if (diff[1] < diff[0] - 20.0) { - // Combine this block with second last block. - split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_); - split_->types[num_blocks_] = split_->types[num_blocks_ - 2]; - std::swap(last_histogram_ix_[0], last_histogram_ix_[1]); - (*histograms_)[last_histogram_ix_[0]] = combined_histo[1]; - last_entropy_[1] = last_entropy_[0]; - last_entropy_[0] = combined_entropy[1]; - ++num_blocks_; - block_size_ = 0; - (*histograms_)[curr_histogram_ix_].Clear(); - merge_last_count_ = 0; - target_block_size_ = min_block_size_; - } else { - // Combine this block with last block. - split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_); - (*histograms_)[last_histogram_ix_[0]] = combined_histo[0]; - last_entropy_[0] = combined_entropy[0]; - if (split_->num_types == 1) { - last_entropy_[1] = last_entropy_[0]; - } - block_size_ = 0; - (*histograms_)[curr_histogram_ix_].Clear(); - if (++merge_last_count_ > 1) { - target_block_size_ += min_block_size_; - } - } - } - if (is_final) { - (*histograms_).resize(split_->num_types); - split_->types.resize(num_blocks_); - split_->lengths.resize(num_blocks_); - } - } - - private: - static const uint16_t kMaxBlockTypes = 256; - - // Alphabet size of particular block category. - const size_t alphabet_size_; - // We collect at least this many symbols for each block. - const size_t min_block_size_; - // We merge histograms A and B if - // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, - // where A is the current histogram and B is the histogram of the last or the - // second last block type. - const double split_threshold_; - - size_t num_blocks_; - BlockSplit* split_; // not owned - std::vector<HistogramType>* histograms_; // not owned - - // The number of symbols that we want to collect before deciding on whether - // or not to merge the block with a previous one or emit a new block. - size_t target_block_size_; - // The number of symbols in the current histogram. - size_t block_size_; - // Offset of the current histogram. - size_t curr_histogram_ix_; - // Offset of the histograms of the previous two block types. - size_t last_histogram_ix_[2]; - // Entropy of the previous two block types. - double last_entropy_[2]; - // The number of times we merged the current block with the last one. - size_t merge_last_count_; -}; - -void BuildMetaBlockGreedy(const uint8_t* ringbuffer, - size_t pos, - size_t mask, - const Command *commands, - size_t n_commands, - MetaBlockSplit* mb) { - size_t num_literals = 0; - for (size_t i = 0; i < n_commands; ++i) { - num_literals += commands[i].insert_len_; - } - - BlockSplitter<HistogramLiteral> lit_blocks( - 256, 512, 400.0, num_literals, - &mb->literal_split, &mb->literal_histograms); - BlockSplitter<HistogramCommand> cmd_blocks( - kNumCommandPrefixes, 1024, 500.0, n_commands, - &mb->command_split, &mb->command_histograms); - BlockSplitter<HistogramDistance> dist_blocks( - 64, 512, 100.0, n_commands, - &mb->distance_split, &mb->distance_histograms); - - for (size_t i = 0; i < n_commands; ++i) { - const Command cmd = commands[i]; - cmd_blocks.AddSymbol(cmd.cmd_prefix_); - for (size_t j = cmd.insert_len_; j != 0; --j) { - lit_blocks.AddSymbol(ringbuffer[pos & mask]); - ++pos; - } - pos += cmd.copy_len_; - if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { - dist_blocks.AddSymbol(cmd.dist_prefix_); - } - } - - lit_blocks.FinishBlock(/* is_final = */ true); - cmd_blocks.FinishBlock(/* is_final = */ true); - dist_blocks.FinishBlock(/* is_final = */ true); -} - -// Greedy block splitter for one block category (literal, command or distance). -// Gathers histograms for all context buckets. -template<typename HistogramType> -class ContextBlockSplitter { - public: - ContextBlockSplitter(size_t alphabet_size, - size_t num_contexts, - size_t min_block_size, - double split_threshold, - size_t num_symbols, - BlockSplit* split, - std::vector<HistogramType>* histograms) - : alphabet_size_(alphabet_size), - num_contexts_(num_contexts), - max_block_types_(kMaxBlockTypes / num_contexts), - min_block_size_(min_block_size), - split_threshold_(split_threshold), - num_blocks_(0), - split_(split), - histograms_(histograms), - target_block_size_(min_block_size), - block_size_(0), - curr_histogram_ix_(0), - last_entropy_(2 * num_contexts), - merge_last_count_(0) { - size_t max_num_blocks = num_symbols / min_block_size + 1; - // We have to allocate one more histogram than the maximum number of block - // types for the current histogram when the meta-block is too big. - size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1); - split_->lengths.resize(max_num_blocks); - split_->types.resize(max_num_blocks); - histograms_->resize(max_num_types * num_contexts); - last_histogram_ix_[0] = last_histogram_ix_[1] = 0; - } - - // Adds the next symbol to the current block type and context. When the - // current block reaches the target size, decides on merging the block. - void AddSymbol(size_t symbol, size_t context) { - (*histograms_)[curr_histogram_ix_ + context].Add(symbol); - ++block_size_; - if (block_size_ == target_block_size_) { - FinishBlock(/* is_final = */ false); - } - } - - // Does either of three things: - // (1) emits the current block with a new block type; - // (2) emits the current block with the type of the second last block; - // (3) merges the current block with the last block. - void FinishBlock(bool is_final) { - if (block_size_ < min_block_size_) { - block_size_ = min_block_size_; - } - if (num_blocks_ == 0) { - // Create first block. - split_->lengths[0] = static_cast<uint32_t>(block_size_); - split_->types[0] = 0; - for (size_t i = 0; i < num_contexts_; ++i) { - last_entropy_[i] = - BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_); - last_entropy_[num_contexts_ + i] = last_entropy_[i]; - } - ++num_blocks_; - ++split_->num_types; - curr_histogram_ix_ += num_contexts_; - block_size_ = 0; - } else if (block_size_ > 0) { - // Try merging the set of histograms for the current block type with the - // respective set of histograms for the last and second last block types. - // Decide over the split based on the total reduction of entropy across - // all contexts. - std::vector<double> entropy(num_contexts_); - std::vector<HistogramType> combined_histo(2 * num_contexts_); - std::vector<double> combined_entropy(2 * num_contexts_); - double diff[2] = { 0.0 }; - for (size_t i = 0; i < num_contexts_; ++i) { - size_t curr_histo_ix = curr_histogram_ix_ + i; - entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0], - alphabet_size_); - for (size_t j = 0; j < 2; ++j) { - size_t jx = j * num_contexts_ + i; - size_t last_histogram_ix = last_histogram_ix_[j] + i; - combined_histo[jx] = (*histograms_)[curr_histo_ix]; - combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]); - combined_entropy[jx] = BitsEntropy( - &combined_histo[jx].data_[0], alphabet_size_); - diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx]; - } - } - - if (split_->num_types < max_block_types_ && - diff[0] > split_threshold_ && - diff[1] > split_threshold_) { - // Create new block. - split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_); - split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types); - last_histogram_ix_[1] = last_histogram_ix_[0]; - last_histogram_ix_[0] = split_->num_types * num_contexts_; - for (size_t i = 0; i < num_contexts_; ++i) { - last_entropy_[num_contexts_ + i] = last_entropy_[i]; - last_entropy_[i] = entropy[i]; - } - ++num_blocks_; - ++split_->num_types; - curr_histogram_ix_ += num_contexts_; - block_size_ = 0; - merge_last_count_ = 0; - target_block_size_ = min_block_size_; - } else if (diff[1] < diff[0] - 20.0) { - // Combine this block with second last block. - split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_); - split_->types[num_blocks_] = split_->types[num_blocks_ - 2]; - std::swap(last_histogram_ix_[0], last_histogram_ix_[1]); - for (size_t i = 0; i < num_contexts_; ++i) { - (*histograms_)[last_histogram_ix_[0] + i] = - combined_histo[num_contexts_ + i]; - last_entropy_[num_contexts_ + i] = last_entropy_[i]; - last_entropy_[i] = combined_entropy[num_contexts_ + i]; - (*histograms_)[curr_histogram_ix_ + i].Clear(); - } - ++num_blocks_; - block_size_ = 0; - merge_last_count_ = 0; - target_block_size_ = min_block_size_; - } else { - // Combine this block with last block. - split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_); - for (size_t i = 0; i < num_contexts_; ++i) { - (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i]; - last_entropy_[i] = combined_entropy[i]; - if (split_->num_types == 1) { - last_entropy_[num_contexts_ + i] = last_entropy_[i]; - } - (*histograms_)[curr_histogram_ix_ + i].Clear(); - } - block_size_ = 0; - if (++merge_last_count_ > 1) { - target_block_size_ += min_block_size_; - } - } - } - if (is_final) { - (*histograms_).resize(split_->num_types * num_contexts_); - split_->types.resize(num_blocks_); - split_->lengths.resize(num_blocks_); - } - } - - private: - static const int kMaxBlockTypes = 256; - - // Alphabet size of particular block category. - const size_t alphabet_size_; - const size_t num_contexts_; - const size_t max_block_types_; - // We collect at least this many symbols for each block. - const size_t min_block_size_; - // We merge histograms A and B if - // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, - // where A is the current histogram and B is the histogram of the last or the - // second last block type. - const double split_threshold_; - - size_t num_blocks_; - BlockSplit* split_; // not owned - std::vector<HistogramType>* histograms_; // not owned - - // The number of symbols that we want to collect before deciding on whether - // or not to merge the block with a previous one or emit a new block. - size_t target_block_size_; - // The number of symbols in the current histogram. - size_t block_size_; - // Offset of the current histogram. - size_t curr_histogram_ix_; - // Offset of the histograms of the previous two block types. - size_t last_histogram_ix_[2]; - // Entropy of the previous two block types. - std::vector<double> last_entropy_; - // The number of times we merged the current block with the last one. - size_t merge_last_count_; -}; - -void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer, - size_t pos, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - ContextType literal_context_mode, - size_t num_contexts, - const uint32_t* static_context_map, - const Command *commands, - size_t n_commands, - MetaBlockSplit* mb) { - size_t num_literals = 0; - for (size_t i = 0; i < n_commands; ++i) { - num_literals += commands[i].insert_len_; - } - - ContextBlockSplitter<HistogramLiteral> lit_blocks( - 256, num_contexts, 512, 400.0, num_literals, - &mb->literal_split, &mb->literal_histograms); - BlockSplitter<HistogramCommand> cmd_blocks( - kNumCommandPrefixes, 1024, 500.0, n_commands, - &mb->command_split, &mb->command_histograms); - BlockSplitter<HistogramDistance> dist_blocks( - 64, 512, 100.0, n_commands, - &mb->distance_split, &mb->distance_histograms); - - for (size_t i = 0; i < n_commands; ++i) { - const Command cmd = commands[i]; - cmd_blocks.AddSymbol(cmd.cmd_prefix_); - for (size_t j = cmd.insert_len_; j != 0; --j) { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); - uint8_t literal = ringbuffer[pos & mask]; - lit_blocks.AddSymbol(literal, static_context_map[context]); - prev_byte2 = prev_byte; - prev_byte = literal; - ++pos; - } - pos += cmd.copy_len_; - if (cmd.copy_len_ > 0) { - prev_byte2 = ringbuffer[(pos - 2) & mask]; - prev_byte = ringbuffer[(pos - 1) & mask]; - if (cmd.cmd_prefix_ >= 128) { - dist_blocks.AddSymbol(cmd.dist_prefix_); - } - } - } - - lit_blocks.FinishBlock(/* is_final = */ true); - cmd_blocks.FinishBlock(/* is_final = */ true); - dist_blocks.FinishBlock(/* is_final = */ true); - - mb->literal_context_map.resize( - mb->literal_split.num_types << kLiteralContextBits); - for (size_t i = 0; i < mb->literal_split.num_types; ++i) { - for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) { - mb->literal_context_map[(i << kLiteralContextBits) + j] = - static_cast<uint32_t>(i * num_contexts) + static_context_map[j]; - } - } -} - -void OptimizeHistograms(size_t num_direct_distance_codes, - size_t distance_postfix_bits, - MetaBlockSplit* mb) { - for (size_t i = 0; i < mb->literal_histograms.size(); ++i) { - OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]); - } - for (size_t i = 0; i < mb->command_histograms.size(); ++i) { - OptimizeHuffmanCountsForRle(kNumCommandPrefixes, - &mb->command_histograms[i].data_[0]); - } - size_t num_distance_codes = - kNumDistanceShortCodes + num_direct_distance_codes + - (48u << distance_postfix_bits); - for (size_t i = 0; i < mb->distance_histograms.size(); ++i) { - OptimizeHuffmanCountsForRle(num_distance_codes, - &mb->distance_histograms[i].data_[0]); - } -} - -} // namespace brotli |