summaryrefslogtreecommitdiffstats
path: root/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--web/server/h2o/libh2o/deps/brotli/enc/metablock.cc534
1 files changed, 534 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc b/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc
new file mode 100644
index 00000000..d6ea8426
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc
@@ -0,0 +1,534 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Algorithms for distributing the literals and commands of a metablock between
+// block types and contexts.
+
+#include "./metablock.h"
+
+#include "./block_splitter.h"
+#include "./context.h"
+#include "./cluster.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+void BuildMetaBlock(const uint8_t* ringbuffer,
+ const size_t pos,
+ const size_t mask,
+ uint8_t prev_byte,
+ uint8_t prev_byte2,
+ const Command* cmds,
+ size_t num_commands,
+ ContextType literal_context_mode,
+ MetaBlockSplit* mb) {
+ SplitBlock(cmds, num_commands,
+ ringbuffer, pos, mask,
+ &mb->literal_split,
+ &mb->command_split,
+ &mb->distance_split);
+
+ std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
+ literal_context_mode);
+
+ size_t num_literal_contexts =
+ mb->literal_split.num_types << kLiteralContextBits;
+ size_t num_distance_contexts =
+ mb->distance_split.num_types << kDistanceContextBits;
+ std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+ mb->command_histograms.resize(mb->command_split.num_types);
+ std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+ BuildHistograms(cmds, num_commands,
+ mb->literal_split,
+ mb->command_split,
+ mb->distance_split,
+ ringbuffer,
+ pos,
+ mask,
+ prev_byte,
+ prev_byte2,
+ literal_context_modes,
+ &literal_histograms,
+ &mb->command_histograms,
+ &distance_histograms);
+
+ // Histogram ids need to fit in one byte.
+ static const size_t kMaxNumberOfHistograms = 256;
+
+ ClusterHistograms(literal_histograms,
+ 1u << kLiteralContextBits,
+ mb->literal_split.num_types,
+ kMaxNumberOfHistograms,
+ &mb->literal_histograms,
+ &mb->literal_context_map);
+
+ ClusterHistograms(distance_histograms,
+ 1u << kDistanceContextBits,
+ mb->distance_split.num_types,
+ kMaxNumberOfHistograms,
+ &mb->distance_histograms,
+ &mb->distance_context_map);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+template<typename HistogramType>
+class BlockSplitter {
+ public:
+ BlockSplitter(size_t alphabet_size,
+ size_t min_block_size,
+ double split_threshold,
+ size_t num_symbols,
+ BlockSplit* split,
+ std::vector<HistogramType>* histograms)
+ : alphabet_size_(alphabet_size),
+ min_block_size_(min_block_size),
+ split_threshold_(split_threshold),
+ num_blocks_(0),
+ split_(split),
+ histograms_(histograms),
+ target_block_size_(min_block_size),
+ block_size_(0),
+ curr_histogram_ix_(0),
+ merge_last_count_(0) {
+ size_t max_num_blocks = num_symbols / min_block_size + 1;
+ // We have to allocate one more histogram than the maximum number of block
+ // types for the current histogram when the meta-block is too big.
+ size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
+ split_->lengths.resize(max_num_blocks);
+ split_->types.resize(max_num_blocks);
+ histograms_->resize(max_num_types);
+ last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+ }
+
+ // Adds the next symbol to the current histogram. When the current histogram
+ // reaches the target size, decides on merging the block.
+ void AddSymbol(size_t symbol) {
+ (*histograms_)[curr_histogram_ix_].Add(symbol);
+ ++block_size_;
+ if (block_size_ == target_block_size_) {
+ FinishBlock(/* is_final = */ false);
+ }
+ }
+
+ // Does either of three things:
+ // (1) emits the current block with a new block type;
+ // (2) emits the current block with the type of the second last block;
+ // (3) merges the current block with the last block.
+ void FinishBlock(bool is_final) {
+ if (block_size_ < min_block_size_) {
+ block_size_ = min_block_size_;
+ }
+ if (num_blocks_ == 0) {
+ // Create first block.
+ split_->lengths[0] = static_cast<uint32_t>(block_size_);
+ split_->types[0] = 0;
+ last_entropy_[0] =
+ BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
+ last_entropy_[1] = last_entropy_[0];
+ ++num_blocks_;
+ ++split_->num_types;
+ ++curr_histogram_ix_;
+ block_size_ = 0;
+ } else if (block_size_ > 0) {
+ double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
+ alphabet_size_);
+ HistogramType combined_histo[2];
+ double combined_entropy[2];
+ double diff[2];
+ for (size_t j = 0; j < 2; ++j) {
+ size_t last_histogram_ix = last_histogram_ix_[j];
+ combined_histo[j] = (*histograms_)[curr_histogram_ix_];
+ combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
+ combined_entropy[j] = BitsEntropy(
+ &combined_histo[j].data_[0], alphabet_size_);
+ diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
+ }
+
+ if (split_->num_types < kMaxBlockTypes &&
+ diff[0] > split_threshold_ &&
+ diff[1] > split_threshold_) {
+ // Create new block.
+ split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+ split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+ last_histogram_ix_[1] = last_histogram_ix_[0];
+ last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
+ last_entropy_[1] = last_entropy_[0];
+ last_entropy_[0] = entropy;
+ ++num_blocks_;
+ ++split_->num_types;
+ ++curr_histogram_ix_;
+ block_size_ = 0;
+ merge_last_count_ = 0;
+ target_block_size_ = min_block_size_;
+ } else if (diff[1] < diff[0] - 20.0) {
+ // Combine this block with second last block.
+ split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+ split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+ std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+ (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
+ last_entropy_[1] = last_entropy_[0];
+ last_entropy_[0] = combined_entropy[1];
+ ++num_blocks_;
+ block_size_ = 0;
+ (*histograms_)[curr_histogram_ix_].Clear();
+ merge_last_count_ = 0;
+ target_block_size_ = min_block_size_;
+ } else {
+ // Combine this block with last block.
+ split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+ (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
+ last_entropy_[0] = combined_entropy[0];
+ if (split_->num_types == 1) {
+ last_entropy_[1] = last_entropy_[0];
+ }
+ block_size_ = 0;
+ (*histograms_)[curr_histogram_ix_].Clear();
+ if (++merge_last_count_ > 1) {
+ target_block_size_ += min_block_size_;
+ }
+ }
+ }
+ if (is_final) {
+ (*histograms_).resize(split_->num_types);
+ split_->types.resize(num_blocks_);
+ split_->lengths.resize(num_blocks_);
+ }
+ }
+
+ private:
+ static const uint16_t kMaxBlockTypes = 256;
+
+ // Alphabet size of particular block category.
+ const size_t alphabet_size_;
+ // We collect at least this many symbols for each block.
+ const size_t min_block_size_;
+ // We merge histograms A and B if
+ // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+ // where A is the current histogram and B is the histogram of the last or the
+ // second last block type.
+ const double split_threshold_;
+
+ size_t num_blocks_;
+ BlockSplit* split_; // not owned
+ std::vector<HistogramType>* histograms_; // not owned
+
+ // The number of symbols that we want to collect before deciding on whether
+ // or not to merge the block with a previous one or emit a new block.
+ size_t target_block_size_;
+ // The number of symbols in the current histogram.
+ size_t block_size_;
+ // Offset of the current histogram.
+ size_t curr_histogram_ix_;
+ // Offset of the histograms of the previous two block types.
+ size_t last_histogram_ix_[2];
+ // Entropy of the previous two block types.
+ double last_entropy_[2];
+ // The number of times we merged the current block with the last one.
+ size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ const Command *commands,
+ size_t n_commands,
+ MetaBlockSplit* mb) {
+ size_t num_literals = 0;
+ for (size_t i = 0; i < n_commands; ++i) {
+ num_literals += commands[i].insert_len_;
+ }
+
+ BlockSplitter<HistogramLiteral> lit_blocks(
+ 256, 512, 400.0, num_literals,
+ &mb->literal_split, &mb->literal_histograms);
+ BlockSplitter<HistogramCommand> cmd_blocks(
+ kNumCommandPrefixes, 1024, 500.0, n_commands,
+ &mb->command_split, &mb->command_histograms);
+ BlockSplitter<HistogramDistance> dist_blocks(
+ 64, 512, 100.0, n_commands,
+ &mb->distance_split, &mb->distance_histograms);
+
+ for (size_t i = 0; i < n_commands; ++i) {
+ const Command cmd = commands[i];
+ cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
+ lit_blocks.AddSymbol(ringbuffer[pos & mask]);
+ ++pos;
+ }
+ pos += cmd.copy_len_;
+ if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
+ dist_blocks.AddSymbol(cmd.dist_prefix_);
+ }
+ }
+
+ lit_blocks.FinishBlock(/* is_final = */ true);
+ cmd_blocks.FinishBlock(/* is_final = */ true);
+ dist_blocks.FinishBlock(/* is_final = */ true);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+// Gathers histograms for all context buckets.
+template<typename HistogramType>
+class ContextBlockSplitter {
+ public:
+ ContextBlockSplitter(size_t alphabet_size,
+ size_t num_contexts,
+ size_t min_block_size,
+ double split_threshold,
+ size_t num_symbols,
+ BlockSplit* split,
+ std::vector<HistogramType>* histograms)
+ : alphabet_size_(alphabet_size),
+ num_contexts_(num_contexts),
+ max_block_types_(kMaxBlockTypes / num_contexts),
+ min_block_size_(min_block_size),
+ split_threshold_(split_threshold),
+ num_blocks_(0),
+ split_(split),
+ histograms_(histograms),
+ target_block_size_(min_block_size),
+ block_size_(0),
+ curr_histogram_ix_(0),
+ last_entropy_(2 * num_contexts),
+ merge_last_count_(0) {
+ size_t max_num_blocks = num_symbols / min_block_size + 1;
+ // We have to allocate one more histogram than the maximum number of block
+ // types for the current histogram when the meta-block is too big.
+ size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
+ split_->lengths.resize(max_num_blocks);
+ split_->types.resize(max_num_blocks);
+ histograms_->resize(max_num_types * num_contexts);
+ last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+ }
+
+ // Adds the next symbol to the current block type and context. When the
+ // current block reaches the target size, decides on merging the block.
+ void AddSymbol(size_t symbol, size_t context) {
+ (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
+ ++block_size_;
+ if (block_size_ == target_block_size_) {
+ FinishBlock(/* is_final = */ false);
+ }
+ }
+
+ // Does either of three things:
+ // (1) emits the current block with a new block type;
+ // (2) emits the current block with the type of the second last block;
+ // (3) merges the current block with the last block.
+ void FinishBlock(bool is_final) {
+ if (block_size_ < min_block_size_) {
+ block_size_ = min_block_size_;
+ }
+ if (num_blocks_ == 0) {
+ // Create first block.
+ split_->lengths[0] = static_cast<uint32_t>(block_size_);
+ split_->types[0] = 0;
+ for (size_t i = 0; i < num_contexts_; ++i) {
+ last_entropy_[i] =
+ BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
+ last_entropy_[num_contexts_ + i] = last_entropy_[i];
+ }
+ ++num_blocks_;
+ ++split_->num_types;
+ curr_histogram_ix_ += num_contexts_;
+ block_size_ = 0;
+ } else if (block_size_ > 0) {
+ // Try merging the set of histograms for the current block type with the
+ // respective set of histograms for the last and second last block types.
+ // Decide over the split based on the total reduction of entropy across
+ // all contexts.
+ std::vector<double> entropy(num_contexts_);
+ std::vector<HistogramType> combined_histo(2 * num_contexts_);
+ std::vector<double> combined_entropy(2 * num_contexts_);
+ double diff[2] = { 0.0 };
+ for (size_t i = 0; i < num_contexts_; ++i) {
+ size_t curr_histo_ix = curr_histogram_ix_ + i;
+ entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
+ alphabet_size_);
+ for (size_t j = 0; j < 2; ++j) {
+ size_t jx = j * num_contexts_ + i;
+ size_t last_histogram_ix = last_histogram_ix_[j] + i;
+ combined_histo[jx] = (*histograms_)[curr_histo_ix];
+ combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
+ combined_entropy[jx] = BitsEntropy(
+ &combined_histo[jx].data_[0], alphabet_size_);
+ diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
+ }
+ }
+
+ if (split_->num_types < max_block_types_ &&
+ diff[0] > split_threshold_ &&
+ diff[1] > split_threshold_) {
+ // Create new block.
+ split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+ split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+ last_histogram_ix_[1] = last_histogram_ix_[0];
+ last_histogram_ix_[0] = split_->num_types * num_contexts_;
+ for (size_t i = 0; i < num_contexts_; ++i) {
+ last_entropy_[num_contexts_ + i] = last_entropy_[i];
+ last_entropy_[i] = entropy[i];
+ }
+ ++num_blocks_;
+ ++split_->num_types;
+ curr_histogram_ix_ += num_contexts_;
+ block_size_ = 0;
+ merge_last_count_ = 0;
+ target_block_size_ = min_block_size_;
+ } else if (diff[1] < diff[0] - 20.0) {
+ // Combine this block with second last block.
+ split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+ split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+ std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+ for (size_t i = 0; i < num_contexts_; ++i) {
+ (*histograms_)[last_histogram_ix_[0] + i] =
+ combined_histo[num_contexts_ + i];
+ last_entropy_[num_contexts_ + i] = last_entropy_[i];
+ last_entropy_[i] = combined_entropy[num_contexts_ + i];
+ (*histograms_)[curr_histogram_ix_ + i].Clear();
+ }
+ ++num_blocks_;
+ block_size_ = 0;
+ merge_last_count_ = 0;
+ target_block_size_ = min_block_size_;
+ } else {
+ // Combine this block with last block.
+ split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+ for (size_t i = 0; i < num_contexts_; ++i) {
+ (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
+ last_entropy_[i] = combined_entropy[i];
+ if (split_->num_types == 1) {
+ last_entropy_[num_contexts_ + i] = last_entropy_[i];
+ }
+ (*histograms_)[curr_histogram_ix_ + i].Clear();
+ }
+ block_size_ = 0;
+ if (++merge_last_count_ > 1) {
+ target_block_size_ += min_block_size_;
+ }
+ }
+ }
+ if (is_final) {
+ (*histograms_).resize(split_->num_types * num_contexts_);
+ split_->types.resize(num_blocks_);
+ split_->lengths.resize(num_blocks_);
+ }
+ }
+
+ private:
+ static const int kMaxBlockTypes = 256;
+
+ // Alphabet size of particular block category.
+ const size_t alphabet_size_;
+ const size_t num_contexts_;
+ const size_t max_block_types_;
+ // We collect at least this many symbols for each block.
+ const size_t min_block_size_;
+ // We merge histograms A and B if
+ // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+ // where A is the current histogram and B is the histogram of the last or the
+ // second last block type.
+ const double split_threshold_;
+
+ size_t num_blocks_;
+ BlockSplit* split_; // not owned
+ std::vector<HistogramType>* histograms_; // not owned
+
+ // The number of symbols that we want to collect before deciding on whether
+ // or not to merge the block with a previous one or emit a new block.
+ size_t target_block_size_;
+ // The number of symbols in the current histogram.
+ size_t block_size_;
+ // Offset of the current histogram.
+ size_t curr_histogram_ix_;
+ // Offset of the histograms of the previous two block types.
+ size_t last_histogram_ix_[2];
+ // Entropy of the previous two block types.
+ std::vector<double> last_entropy_;
+ // The number of times we merged the current block with the last one.
+ size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
+ size_t pos,
+ size_t mask,
+ uint8_t prev_byte,
+ uint8_t prev_byte2,
+ ContextType literal_context_mode,
+ size_t num_contexts,
+ const uint32_t* static_context_map,
+ const Command *commands,
+ size_t n_commands,
+ MetaBlockSplit* mb) {
+ size_t num_literals = 0;
+ for (size_t i = 0; i < n_commands; ++i) {
+ num_literals += commands[i].insert_len_;
+ }
+
+ ContextBlockSplitter<HistogramLiteral> lit_blocks(
+ 256, num_contexts, 512, 400.0, num_literals,
+ &mb->literal_split, &mb->literal_histograms);
+ BlockSplitter<HistogramCommand> cmd_blocks(
+ kNumCommandPrefixes, 1024, 500.0, n_commands,
+ &mb->command_split, &mb->command_histograms);
+ BlockSplitter<HistogramDistance> dist_blocks(
+ 64, 512, 100.0, n_commands,
+ &mb->distance_split, &mb->distance_histograms);
+
+ for (size_t i = 0; i < n_commands; ++i) {
+ const Command cmd = commands[i];
+ cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+ for (size_t j = cmd.insert_len_; j != 0; --j) {
+ size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+ uint8_t literal = ringbuffer[pos & mask];
+ lit_blocks.AddSymbol(literal, static_context_map[context]);
+ prev_byte2 = prev_byte;
+ prev_byte = literal;
+ ++pos;
+ }
+ pos += cmd.copy_len_;
+ if (cmd.copy_len_ > 0) {
+ prev_byte2 = ringbuffer[(pos - 2) & mask];
+ prev_byte = ringbuffer[(pos - 1) & mask];
+ if (cmd.cmd_prefix_ >= 128) {
+ dist_blocks.AddSymbol(cmd.dist_prefix_);
+ }
+ }
+ }
+
+ lit_blocks.FinishBlock(/* is_final = */ true);
+ cmd_blocks.FinishBlock(/* is_final = */ true);
+ dist_blocks.FinishBlock(/* is_final = */ true);
+
+ mb->literal_context_map.resize(
+ mb->literal_split.num_types << kLiteralContextBits);
+ for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
+ for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
+ mb->literal_context_map[(i << kLiteralContextBits) + j] =
+ static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
+ }
+ }
+}
+
+void OptimizeHistograms(size_t num_direct_distance_codes,
+ size_t distance_postfix_bits,
+ MetaBlockSplit* mb) {
+ for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
+ OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
+ }
+ for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
+ OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
+ &mb->command_histograms[i].data_[0]);
+ }
+ size_t num_distance_codes =
+ kNumDistanceShortCodes + num_direct_distance_codes +
+ (48u << distance_postfix_bits);
+ for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
+ OptimizeHuffmanCountsForRle(num_distance_codes,
+ &mb->distance_histograms[i].data_[0]);
+ }
+}
+
+} // namespace brotli