1 files changed, 534 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc b/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc
new file mode 100644
index 00000000..d6ea8426
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/brotli/enc/metablock.cc
@@ -0,0 +1,534 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Algorithms for distributing the literals and commands of a metablock between
+// block types and contexts.
+
+#include "./metablock.h"
+
+#include "./block_splitter.h"
+#include "./context.h"
+#include "./cluster.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+void BuildMetaBlock(const uint8_t* ringbuffer,
+                    const size_t pos,
+                    const size_t mask,
+                    uint8_t prev_byte,
+                    uint8_t prev_byte2,
+                    const Command* cmds,
+                    size_t num_commands,
+                    ContextType literal_context_mode,
+                    MetaBlockSplit* mb) {
+  SplitBlock(cmds, num_commands,
+             ringbuffer, pos, mask,
+             &mb->literal_split,
+             &mb->command_split,
+             &mb->distance_split);
+
+  std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
+                                                 literal_context_mode);
+
+  size_t num_literal_contexts =
+      mb->literal_split.num_types << kLiteralContextBits;
+  size_t num_distance_contexts =
+      mb->distance_split.num_types << kDistanceContextBits;
+  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+  mb->command_histograms.resize(mb->command_split.num_types);
+  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+  BuildHistograms(cmds, num_commands,
+                  mb->literal_split,
+                  mb->command_split,
+                  mb->distance_split,
+                  ringbuffer,
+                  pos,
+                  mask,
+                  prev_byte,
+                  prev_byte2,
+                  literal_context_modes,
+                  &literal_histograms,
+                  &mb->command_histograms,
+                  &distance_histograms);
+
+  // Histogram ids need to fit in one byte.
+  static const size_t kMaxNumberOfHistograms = 256;
+
+  ClusterHistograms(literal_histograms,
+                    1u << kLiteralContextBits,
+                    mb->literal_split.num_types,
+                    kMaxNumberOfHistograms,
+                    &mb->literal_histograms,
+                    &mb->literal_context_map);
+
+  ClusterHistograms(distance_histograms,
+                    1u << kDistanceContextBits,
+                    mb->distance_split.num_types,
+                    kMaxNumberOfHistograms,
+                    &mb->distance_histograms,
+                    &mb->distance_context_map);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+template<typename HistogramType>
+class BlockSplitter {
+ public:
+  BlockSplitter(size_t alphabet_size,
+                size_t min_block_size,
+                double split_threshold,
+                size_t num_symbols,
+                BlockSplit* split,
+                std::vector<HistogramType>* histograms)
+      : alphabet_size_(alphabet_size),
+        min_block_size_(min_block_size),
+        split_threshold_(split_threshold),
+        num_blocks_(0),
+        split_(split),
+        histograms_(histograms),
+        target_block_size_(min_block_size),
+        block_size_(0),
+        curr_histogram_ix_(0),
+        merge_last_count_(0) {
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
+    // We have to allocate one more histogram than the maximum number of block
+    // types for the current histogram when the meta-block is too big.
+    size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
+    split_->lengths.resize(max_num_blocks);
+    split_->types.resize(max_num_blocks);
+    histograms_->resize(max_num_types);
+    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+  }
+
+  // Adds the next symbol to the current histogram. When the current histogram
+  // reaches the target size, decides on merging the block.
+  void AddSymbol(size_t symbol) {
+    (*histograms_)[curr_histogram_ix_].Add(symbol);
+    ++block_size_;
+    if (block_size_ == target_block_size_) {
+      FinishBlock(/* is_final = */ false);
+    }
+  }
+
+  // Does either of three things:
+  //   (1) emits the current block with a new block type;
+  //   (2) emits the current block with the type of the second last block;
+  //   (3) merges the current block with the last block.
+  void FinishBlock(bool is_final) {
+    if (block_size_ < min_block_size_) {
+      block_size_ = min_block_size_;
+    }
+    if (num_blocks_ == 0) {
+      // Create first block.
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
+      split_->types[0] = 0;
+      last_entropy_[0] =
+          BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
+      last_entropy_[1] = last_entropy_[0];
+      ++num_blocks_;
+      ++split_->num_types;
+      ++curr_histogram_ix_;
+      block_size_ = 0;
+    } else if (block_size_ > 0) {
+      double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
+                                   alphabet_size_);
+      HistogramType combined_histo[2];
+      double combined_entropy[2];
+      double diff[2];
+      for (size_t j = 0; j < 2; ++j) {
+        size_t last_histogram_ix = last_histogram_ix_[j];
+        combined_histo[j] = (*histograms_)[curr_histogram_ix_];
+        combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
+        combined_entropy[j] = BitsEntropy(
+            &combined_histo[j].data_[0], alphabet_size_);
+        diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
+      }
+
+      if (split_->num_types < kMaxBlockTypes &&
+          diff[0] > split_threshold_ &&
+          diff[1] > split_threshold_) {
+        // Create new block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+        last_histogram_ix_[1] = last_histogram_ix_[0];
+        last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
+        last_entropy_[1] = last_entropy_[0];
+        last_entropy_[0] = entropy;
+        ++num_blocks_;
+        ++split_->num_types;
+        ++curr_histogram_ix_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else if (diff[1] < diff[0] - 20.0) {
+        // Combine this block with second last block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+        (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
+        last_entropy_[1] = last_entropy_[0];
+        last_entropy_[0] = combined_entropy[1];
+        ++num_blocks_;
+        block_size_ = 0;
+        (*histograms_)[curr_histogram_ix_].Clear();
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else {
+        // Combine this block with last block.
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+        (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
+        last_entropy_[0] = combined_entropy[0];
+        if (split_->num_types == 1) {
+          last_entropy_[1] = last_entropy_[0];
+        }
+        block_size_ = 0;
+        (*histograms_)[curr_histogram_ix_].Clear();
+        if (++merge_last_count_ > 1) {
+          target_block_size_ += min_block_size_;
+        }
+      }
+    }
+    if (is_final) {
+      (*histograms_).resize(split_->num_types);
+      split_->types.resize(num_blocks_);
+      split_->lengths.resize(num_blocks_);
+    }
+  }
+
+ private:
+  static const uint16_t kMaxBlockTypes = 256;
+
+  // Alphabet size of particular block category.
+  const size_t alphabet_size_;
+  // We collect at least this many symbols for each block.
+  const size_t min_block_size_;
+  // We merge histograms A and B if
+  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+  // where A is the current histogram and B is the histogram of the last or the
+  // second last block type.
+  const double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  // not owned
+  std::vector<HistogramType>* histograms_;  // not owned
+
+  // The number of symbols that we want to collect before deciding on whether
+  // or not to merge the block with a previous one or emit a new block.
+  size_t target_block_size_;
+  // The number of symbols in the current histogram.
+  size_t block_size_;
+  // Offset of the current histogram.
+  size_t curr_histogram_ix_;
+  // Offset of the histograms of the previous two block types.
+  size_t last_histogram_ix_[2];
+  // Entropy of the previous two block types.
+  double last_entropy_[2];
+  // The number of times we merged the current block with the last one.
+  size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
+                          size_t pos,
+                          size_t mask,
+                          const Command *commands,
+                          size_t n_commands,
+                          MetaBlockSplit* mb) {
+  size_t num_literals = 0;
+  for (size_t i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  BlockSplitter<HistogramLiteral> lit_blocks(
+      256, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms);
+  BlockSplitter<HistogramCommand> cmd_blocks(
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
+      &mb->command_split, &mb->command_histograms);
+  BlockSplitter<HistogramDistance> dist_blocks(
+      64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms);
+
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      lit_blocks.AddSymbol(ringbuffer[pos & mask]);
+      ++pos;
+    }
+    pos += cmd.copy_len_;
+    if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
+      dist_blocks.AddSymbol(cmd.dist_prefix_);
+    }
+  }
+
+  lit_blocks.FinishBlock(/* is_final = */ true);
+  cmd_blocks.FinishBlock(/* is_final = */ true);
+  dist_blocks.FinishBlock(/* is_final = */ true);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+// Gathers histograms for all context buckets.
+template<typename HistogramType>
+class ContextBlockSplitter {
+ public:
+  ContextBlockSplitter(size_t alphabet_size,
+                       size_t num_contexts,
+                       size_t min_block_size,
+                       double split_threshold,
+                       size_t num_symbols,
+                       BlockSplit* split,
+                       std::vector<HistogramType>* histograms)
+      : alphabet_size_(alphabet_size),
+        num_contexts_(num_contexts),
+        max_block_types_(kMaxBlockTypes / num_contexts),
+        min_block_size_(min_block_size),
+        split_threshold_(split_threshold),
+        num_blocks_(0),
+        split_(split),
+        histograms_(histograms),
+        target_block_size_(min_block_size),
+        block_size_(0),
+        curr_histogram_ix_(0),
+        last_entropy_(2 * num_contexts),
+        merge_last_count_(0) {
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
+    // We have to allocate one more histogram than the maximum number of block
+    // types for the current histogram when the meta-block is too big.
+    size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
+    split_->lengths.resize(max_num_blocks);
+    split_->types.resize(max_num_blocks);
+    histograms_->resize(max_num_types * num_contexts);
+    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+  }
+
+  // Adds the next symbol to the current block type and context. When the
+  // current block reaches the target size, decides on merging the block.
+  void AddSymbol(size_t symbol, size_t context) {
+    (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
+    ++block_size_;
+    if (block_size_ == target_block_size_) {
+      FinishBlock(/* is_final = */ false);
+    }
+  }
+
+  // Does either of three things:
+  //   (1) emits the current block with a new block type;
+  //   (2) emits the current block with the type of the second last block;
+  //   (3) merges the current block with the last block.
+  void FinishBlock(bool is_final) {
+    if (block_size_ < min_block_size_) {
+      block_size_ = min_block_size_;
+    }
+    if (num_blocks_ == 0) {
+      // Create first block.
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
+      split_->types[0] = 0;
+      for (size_t i = 0; i < num_contexts_; ++i) {
+        last_entropy_[i] =
+            BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
+        last_entropy_[num_contexts_ + i] = last_entropy_[i];
+      }
+      ++num_blocks_;
+      ++split_->num_types;
+      curr_histogram_ix_ += num_contexts_;
+      block_size_ = 0;
+    } else if (block_size_ > 0) {
+      // Try merging the set of histograms for the current block type with the
+      // respective set of histograms for the last and second last block types.
+      // Decide over the split based on the total reduction of entropy across
+      // all contexts.
+      std::vector<double> entropy(num_contexts_);
+      std::vector<HistogramType> combined_histo(2 * num_contexts_);
+      std::vector<double> combined_entropy(2 * num_contexts_);
+      double diff[2] = { 0.0 };
+      for (size_t i = 0; i < num_contexts_; ++i) {
+        size_t curr_histo_ix = curr_histogram_ix_ + i;
+        entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
+                                 alphabet_size_);
+        for (size_t j = 0; j < 2; ++j) {
+          size_t jx = j * num_contexts_ + i;
+          size_t last_histogram_ix = last_histogram_ix_[j] + i;
+          combined_histo[jx] = (*histograms_)[curr_histo_ix];
+          combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
+          combined_entropy[jx] = BitsEntropy(
+              &combined_histo[jx].data_[0], alphabet_size_);
+          diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
+        }
+      }
+
+      if (split_->num_types < max_block_types_ &&
+          diff[0] > split_threshold_ &&
+          diff[1] > split_threshold_) {
+        // Create new block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+        last_histogram_ix_[1] = last_histogram_ix_[0];
+        last_histogram_ix_[0] = split_->num_types * num_contexts_;
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          last_entropy_[i] = entropy[i];
+        }
+        ++num_blocks_;
+        ++split_->num_types;
+        curr_histogram_ix_ += num_contexts_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else if (diff[1] < diff[0] - 20.0) {
+        // Combine this block with second last block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          (*histograms_)[last_histogram_ix_[0] + i] =
+              combined_histo[num_contexts_ + i];
+          last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          last_entropy_[i] = combined_entropy[num_contexts_ + i];
+          (*histograms_)[curr_histogram_ix_ + i].Clear();
+        }
+        ++num_blocks_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else {
+        // Combine this block with last block.
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
+          last_entropy_[i] = combined_entropy[i];
+          if (split_->num_types == 1) {
+            last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          }
+          (*histograms_)[curr_histogram_ix_ + i].Clear();
+        }
+        block_size_ = 0;
+        if (++merge_last_count_ > 1) {
+          target_block_size_ += min_block_size_;
+        }
+      }
+    }
+    if (is_final) {
+      (*histograms_).resize(split_->num_types * num_contexts_);
+      split_->types.resize(num_blocks_);
+      split_->lengths.resize(num_blocks_);
+    }
+  }
+
+ private:
+  static const int kMaxBlockTypes = 256;
+
+  // Alphabet size of particular block category.
+  const size_t alphabet_size_;
+  const size_t num_contexts_;
+  const size_t max_block_types_;
+  // We collect at least this many symbols for each block.
+  const size_t min_block_size_;
+  // We merge histograms A and B if
+  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+  // where A is the current histogram and B is the histogram of the last or the
+  // second last block type.
+  const double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  // not owned
+  std::vector<HistogramType>* histograms_;  // not owned
+
+  // The number of symbols that we want to collect before deciding on whether
+  // or not to merge the block with a previous one or emit a new block.
+  size_t target_block_size_;
+  // The number of symbols in the current histogram.
+  size_t block_size_;
+  // Offset of the current histogram.
+  size_t curr_histogram_ix_;
+  // Offset of the histograms of the previous two block types.
+  size_t last_histogram_ix_[2];
+  // Entropy of the previous two block types.
+  std::vector<double> last_entropy_;
+  // The number of times we merged the current block with the last one.
+  size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
+                                      size_t pos,
+                                      size_t mask,
+                                      uint8_t prev_byte,
+                                      uint8_t prev_byte2,
+                                      ContextType literal_context_mode,
+                                      size_t num_contexts,
+                                      const uint32_t* static_context_map,
+                                      const Command *commands,
+                                      size_t n_commands,
+                                      MetaBlockSplit* mb) {
+  size_t num_literals = 0;
+  for (size_t i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  ContextBlockSplitter<HistogramLiteral> lit_blocks(
+      256, num_contexts, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms);
+  BlockSplitter<HistogramCommand> cmd_blocks(
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
+      &mb->command_split, &mb->command_histograms);
+  BlockSplitter<HistogramDistance> dist_blocks(
+      64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms);
+
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+      uint8_t literal = ringbuffer[pos & mask];
+      lit_blocks.AddSymbol(literal, static_context_map[context]);
+      prev_byte2 = prev_byte;
+      prev_byte = literal;
+      ++pos;
+    }
+    pos += cmd.copy_len_;
+    if (cmd.copy_len_ > 0) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        dist_blocks.AddSymbol(cmd.dist_prefix_);
+      }
+    }
+  }
+
+  lit_blocks.FinishBlock(/* is_final = */ true);
+  cmd_blocks.FinishBlock(/* is_final = */ true);
+  dist_blocks.FinishBlock(/* is_final = */ true);
+
+  mb->literal_context_map.resize(
+      mb->literal_split.num_types << kLiteralContextBits);
+  for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
+    for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
+      mb->literal_context_map[(i << kLiteralContextBits) + j] =
+          static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
+    }
+  }
+}
+
+void OptimizeHistograms(size_t num_direct_distance_codes,
+                        size_t distance_postfix_bits,
+                        MetaBlockSplit* mb) {
+  for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
+  }
+  for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
+                                &mb->command_histograms[i].data_[0]);
+  }
+  size_t num_distance_codes =
+      kNumDistanceShortCodes + num_direct_distance_codes +
+      (48u << distance_postfix_bits);
+  for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(num_distance_codes,
+                                &mb->distance_histograms[i].data_[0]);
+  }
+}
+
+}  // namespace brotli