summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jpegli/entropy_coding.cc')
-rw-r--r--third_party/jpeg-xl/lib/jpegli/entropy_coding.cc837
1 files changed, 837 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc
new file mode 100644
index 0000000000..7e50bbc3a7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc
@@ -0,0 +1,837 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/entropy_coding.h"
+
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/entropy_coding-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx,
+ int ac_ctx, Token** tokens_ptr) {
+ ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx,
+ tokens_ptr);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
+ int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ size_t blocks_per_mcu = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
+}
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+ size_t num_tokens, size_t max_per_row) {
+ size_t estimate;
+ if (mcu_y == 0) {
+ estimate = 16 * max_per_row;
+ } else {
+ estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
+ }
+ size_t mcus_left = ysize_mcus - mcu_y;
+ return std::min(mcus_left * max_per_row,
+ std::max(max_per_row, estimate - num_tokens));
+}
+
+namespace {
+HWY_EXPORT(ComputeTokensSequential);
+
+void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al,
+ coeff_t* last_dc_coeff, Token** next_token) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ int bits = temp2 & ((1 << nbits) - 1);
+ *(*next_token)++ = Token(context, nbits, bits);
+}
+
+void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index,
+ int context, ScanTokenInfo* sti) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const int comp_idx = scan_info->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ const int Al = scan_info->Al;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ const size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+ size_t num_restarts =
+ restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+ size_t restart_idx = 0;
+ int eob_run = 0;
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ sti->token_offset = m->total_num_tokens + ta->num_tokens;
+ sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+ 1, false);
+ // Each coefficient can appear in at most one token, but we have to reserve
+ // one extra EOBrun token that was rolled over from the previous block-row
+ // and has to be flushed at the end.
+ int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1);
+ if (ta->num_tokens + max_tokens_per_row > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens =
+ EstimateNumTokens(cinfo, by, comp->height_in_blocks,
+ m->total_num_tokens, max_tokens_per_row);
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+ restarts_to_go = restart_interval;
+ }
+ const coeff_t* block = &ba[0][bx][0];
+ coeff_t temp2;
+ coeff_t temp;
+ int r = 0;
+ int num_nzeros = 0;
+ int num_future_nzeros = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = block[k]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ num_future_nzeros++;
+ continue;
+ }
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ while (r > 15) {
+ *m->next_token++ = Token(context, 0xf0, 0);
+ r -= 16;
+ }
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1));
+ ++num_nzeros;
+ r = 0;
+ }
+ if (r > 0) {
+ ++eob_run;
+ if (eob_run == 0x7FFF) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ =
+ Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ eob_run = 0;
+ }
+ }
+ sti->num_nonzeros += num_nzeros;
+ sti->num_future_nonzeros += num_future_nzeros;
+ --restarts_to_go;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ }
+ if (eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
+ int symbol = nbits << 4u;
+ *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1));
+ ++ta->num_tokens;
+ eob_run = 0;
+ }
+ sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset;
+ sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
+}
+
+void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index,
+ ScanTokenInfo* sti) {
+ jpeg_comp_master* m = cinfo->master;
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const int comp_idx = scan_info->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ const int Al = scan_info->Al;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ const size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ RefToken token;
+ int eob_run = 0;
+ int eob_refbits = 0;
+ size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
+ size_t num_restarts =
+ restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
+ sti->tokens = m->next_refinement_token;
+ sti->refbits = m->next_refinement_bit;
+ sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE);
+ sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
+ RefToken* next_token = sti->tokens;
+ RefToken* next_eob_token = next_token;
+ uint8_t* next_ref_bit = sti->refbits;
+ uint16_t* next_eobrun = sti->eobruns;
+ size_t restart_idx = 0;
+ for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
+ 1, false);
+ for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ sti->restarts[restart_idx++] = next_token - sti->tokens;
+ restarts_to_go = restart_interval;
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ const coeff_t* block = &ba[0][bx][0];
+ int num_eob_refinement_bits = 0;
+ int num_refinement_bits = 0;
+ int num_nzeros = 0;
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ int absval = block[k];
+ if (absval == 0) {
+ r++;
+ continue;
+ }
+ const int mask = absval >> (8 * sizeof(int) - 1);
+ absval += mask;
+ absval ^= mask;
+ absval >>= Al;
+ if (absval == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15) {
+ token.symbol = 0xf0;
+ token.refbits = num_refinement_bits;
+ *next_token++ = token;
+ r -= 16;
+ num_eob_refinement_bits += num_refinement_bits;
+ num_refinement_bits = 0;
+ }
+ if (absval > 1) {
+ *next_ref_bit++ = absval & 1u;
+ ++num_refinement_bits;
+ continue;
+ }
+ int symbol = (r << 4u) + 1 + ((mask + 1) << 1);
+ token.symbol = symbol;
+ token.refbits = num_refinement_bits;
+ *next_token++ = token;
+ ++num_nzeros;
+ num_refinement_bits = 0;
+ num_eob_refinement_bits = 0;
+ r = 0;
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) {
+ ++eob_run;
+ eob_refbits += num_eob_refinement_bits + num_refinement_bits;
+ if (eob_refbits > 255) {
+ ++next_eob_token;
+ eob_refbits = num_eob_refinement_bits + num_refinement_bits;
+ eob_run = 1;
+ }
+ next_token = next_eob_token;
+ next_token->refbits = eob_refbits;
+ if (eob_run == 1) {
+ next_token->symbol = 0;
+ } else if (eob_run == 2) {
+ next_token->symbol = 16;
+ *next_eobrun++ = 0;
+ } else if ((eob_run & (eob_run - 1)) == 0) {
+ next_token->symbol += 16;
+ next_eobrun[-1] = 0;
+ } else {
+ ++next_eobrun[-1];
+ }
+ ++next_token;
+ if (eob_run == 0x7fff) {
+ next_eob_token = next_token;
+ eob_run = eob_refbits = 0;
+ }
+ }
+ sti->num_nonzeros += num_nzeros;
+ --restarts_to_go;
+ }
+ }
+ sti->num_tokens = next_token - sti->tokens;
+ sti->restarts[restart_idx++] = sti->num_tokens;
+ m->next_refinement_token = next_token;
+ m->next_refinement_bit = next_ref_bit;
+}
+
+void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset,
+ ScanTokenInfo* sti) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ if (scan_info->Ss > 0) {
+ if (scan_info->Ah == 0) {
+ TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti);
+ } else {
+ TokenizeACRefinementScan(cinfo, scan_index, sti);
+ }
+ return;
+ }
+
+ jpeg_comp_master* m = cinfo->master;
+ size_t restart_interval = sti->restart_interval;
+ int restarts_to_go = restart_interval;
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ const bool is_progressive = cinfo->progressive_mode;
+ const int Ah = scan_info->Ah;
+ const int Al = scan_info->Al;
+ HWY_ALIGN constexpr coeff_t kSinkBlock[DCTSIZE2] = {0};
+
+ size_t restart_idx = 0;
+ TokenArray* ta = &m->token_arrays[m->cur_token_array];
+ sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens;
+
+ if (Ah > 0) {
+ sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE);
+ } else if (cinfo->progressive_mode) {
+ if (ta->num_tokens + sti->num_blocks > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens = sti->num_blocks;
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ }
+
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ size_t block_idx = 0;
+ for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) {
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int by0 = mcu_y * n_blocks_y;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(n_blocks_y, block_rows_left);
+ ba[i] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+ by0, max_block_rows, false);
+ }
+ if (!cinfo->progressive_mode) {
+ int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+ if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
+ if (ta->tokens) {
+ m->total_num_tokens += ta->num_tokens;
+ ++m->cur_token_array;
+ ta = &m->token_arrays[m->cur_token_array];
+ }
+ m->num_tokens =
+ EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan,
+ m->total_num_tokens, max_tokens_per_mcu_row);
+ ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
+ m->next_token = ta->tokens;
+ }
+ }
+ for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ restarts_to_go = restart_interval;
+ memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+ ta->num_tokens = m->next_token - ta->tokens;
+ sti->restarts[restart_idx++] =
+ Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens;
+ }
+ // Encode one MCU
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ size_t block_y = mcu_y * n_blocks_y + iy;
+ size_t block_x = mcu_x * n_blocks_x + ix;
+ const coeff_t* block;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ block = kSinkBlock;
+ } else {
+ block = &ba[i][iy][block_x][0];
+ }
+ if (!is_progressive) {
+ HWY_DYNAMIC_DISPATCH(ComputeTokensSequential)
+ (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i,
+ &m->next_token);
+ last_dc_coeff[i] = block[0];
+ } else {
+ if (Ah == 0) {
+ TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i,
+ &m->next_token);
+ } else {
+ sti->refbits[block_idx] = (block[0] >> Al) & 1;
+ }
+ }
+ ++block_idx;
+ }
+ }
+ }
+ --restarts_to_go;
+ }
+ ta->num_tokens = m->next_token - ta->tokens;
+ }
+ JXL_DASSERT(block_idx == sti->num_blocks);
+ sti->num_tokens =
+ Ah > 0 ? sti->num_blocks
+ : m->total_num_tokens + ta->num_tokens - sti->token_offset;
+ sti->restarts[restart_idx++] =
+ Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens;
+ if (Ah == 0 && cinfo->progressive_mode) {
+ JXL_DASSERT(sti->num_blocks == sti->num_tokens);
+ }
+}
+
+} // namespace
+
+void TokenizeJpeg(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ std::vector<int> processed(cinfo->num_scans);
+ size_t max_refinement_tokens = 0;
+ size_t num_refinement_bits = 0;
+ int num_refinement_scans[DCTSIZE2] = {};
+ int max_num_refinement_scans = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ ScanTokenInfo* sti = &m->scan_token_info[i];
+ if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) {
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, sti);
+ processed[i] = 1;
+ max_refinement_tokens += sti->num_future_nonzeros;
+ for (int k = si->Ss; k <= si->Se; ++k) {
+ num_refinement_scans[k] = si->Al;
+ }
+ max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al);
+ num_refinement_bits += sti->num_nonzeros;
+ }
+ if (si->Ss > 0 && si->Ah > 0) {
+ int comp_idx = si->component_index[0];
+ const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks;
+ max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks;
+ }
+ }
+ if (max_refinement_tokens > 0) {
+ m->next_refinement_token =
+ Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE);
+ }
+ for (int j = 0; j < max_num_refinement_scans; ++j) {
+ uint8_t* refinement_bits =
+ Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE);
+ m->next_refinement_bit = refinement_bits;
+ size_t new_refinement_bits = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ ScanTokenInfo* sti = &m->scan_token_info[i];
+ if (si->Ss > 0 && si->Ah > 0 &&
+ si->Ah == num_refinement_scans[si->Ss] - j) {
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, sti);
+ processed[i] = 1;
+ new_refinement_bits += sti->num_nonzeros;
+ }
+ }
+ JXL_DASSERT(m->next_refinement_bit ==
+ refinement_bits + num_refinement_bits);
+ num_refinement_bits += new_refinement_bits;
+ }
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ if (processed[i]) {
+ continue;
+ }
+ int offset = m->ac_ctx_offset[i];
+ TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]);
+ processed[i] = 1;
+ }
+}
+
+namespace {
+
+struct Histogram {
+ int count[kJpegHuffmanAlphabetSize];
+ Histogram() { memset(count, 0, sizeof(count)); }
+};
+
+void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t num_token_arrays = m->cur_token_array + 1;
+ for (size_t i = 0; i < num_token_arrays; ++i) {
+ Token* tokens = m->token_arrays[i].tokens;
+ size_t num_tokens = m->token_arrays[i].num_tokens;
+ for (size_t j = 0; j < num_tokens; ++j) {
+ Token t = tokens[j];
+ ++histograms[t.context].count[t.symbol];
+ }
+ }
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info& si = cinfo->scan_info[i];
+ const ScanTokenInfo& sti = m->scan_token_info[i];
+ if (si.Ss > 0 && si.Ah > 0) {
+ int context = m->ac_ctx_offset[i];
+ int* ac_histo = &histograms[context].count[0];
+ for (size_t j = 0; j < sti.num_tokens; ++j) {
+ ++ac_histo[sti.tokens[j].symbol & 253];
+ }
+ }
+ }
+}
+
+struct JpegClusteredHistograms {
+ std::vector<Histogram> histograms;
+ std::vector<uint32_t> histogram_indexes;
+ std::vector<uint32_t> slot_ids;
+};
+
+float HistogramCost(const Histogram& histo) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ counts[i] = histo.count[i];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8;
+ size_t data_bits = 0;
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ header_bits += 8;
+ data_bits += counts[i] * depths[i];
+ }
+ }
+ return header_bits + data_bits;
+}
+
+void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ c->count[i] = a.count[i] + b.count[i];
+ }
+}
+
+bool IsEmptyHistogram(const Histogram& histo) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (histo.count[i]) return false;
+ }
+ return true;
+}
+
+void ClusterJpegHistograms(const Histogram* histograms, size_t num,
+ JpegClusteredHistograms* clusters) {
+ clusters->histogram_indexes.resize(num);
+ std::vector<uint32_t> slot_histograms;
+ std::vector<float> slot_costs;
+ for (size_t i = 0; i < num; ++i) {
+ const Histogram& cur = histograms[i];
+ if (IsEmptyHistogram(cur)) {
+ continue;
+ }
+ float best_cost = HistogramCost(cur);
+ size_t best_slot = slot_histograms.size();
+ for (size_t j = 0; j < slot_histograms.size(); ++j) {
+ size_t prev_idx = slot_histograms[j];
+ const Histogram& prev = clusters->histograms[prev_idx];
+ Histogram combined;
+ AddHistograms(prev, cur, &combined);
+ float combined_cost = HistogramCost(combined);
+ float cost = combined_cost - slot_costs[j];
+ if (cost < best_cost) {
+ best_cost = cost;
+ best_slot = j;
+ }
+ }
+ if (best_slot == slot_histograms.size()) {
+ // Create new histogram.
+ size_t histogram_index = clusters->histograms.size();
+ clusters->histograms.push_back(cur);
+ clusters->histogram_indexes[i] = histogram_index;
+ if (best_slot < 4) {
+ // We have a free slot, so we put the new histogram there.
+ slot_histograms.push_back(histogram_index);
+ slot_costs.push_back(best_cost);
+ } else {
+ // TODO(szabadka) Find the best histogram to replce.
+ best_slot = (clusters->slot_ids.back() + 1) % 4;
+ }
+ slot_histograms[best_slot] = histogram_index;
+ slot_costs[best_slot] = best_cost;
+ clusters->slot_ids.push_back(best_slot);
+ } else {
+ // Merge this histogram with a previous one.
+ size_t histogram_index = slot_histograms[best_slot];
+ const Histogram& prev = clusters->histograms[histogram_index];
+ AddHistograms(prev, cur, &clusters->histograms[histogram_index]);
+ clusters->histogram_indexes[i] = histogram_index;
+ JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot);
+ slot_costs[best_slot] += best_cost;
+ }
+ }
+}
+
+void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc,
+ int* inv_slot_map, uint8_t* slot_id_map,
+ JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) {
+ const char* type = is_dc ? "DC" : "AC";
+ if (index < 0 || index >= NUM_HUFF_TBLS) {
+ JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index);
+ }
+ // Check if we have already copied this Huffman table.
+ int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS);
+ if (inv_slot_map[slot_idx] != -1) {
+ return;
+ }
+ inv_slot_map[slot_idx] = *num_huffman_tables;
+ // Look up and validate Huffman table.
+ JHUFF_TBL* table =
+ is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index];
+ if (table == nullptr) {
+ JPEGLI_ERROR("Missing %s Huffman table %d", type, index);
+ }
+ ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc);
+ // Copy Huffman table to the end of the list and save slot id.
+ slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10);
+ memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL));
+ ++(*num_huffman_tables);
+}
+
+void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) {
+ counts[j] = histo.count[j];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ memset(table, 0, sizeof(JHUFF_TBL));
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ ++table->bits[depths[i]];
+ }
+ }
+ int offset[kJpegHuffmanMaxBitLength + 1] = {0};
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ offset[i] = offset[i - 1] + table->bits[i - 1];
+ }
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ table->huffval[offset[depths[i]]++] = i;
+ }
+ }
+}
+
+} // namespace
+
+void CopyHuffmanTables(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t max_huff_tables = 2 * cinfo->num_components;
+ // Copy Huffman tables and save slot ids.
+ m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE);
+ m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE);
+ m->num_huffman_tables = 0;
+ int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0],
+ m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+ CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0],
+ m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
+ }
+ // Compute context map.
+ m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE);
+ memset(m->context_map, 0, 8);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no];
+ }
+ int ac_ctx = 4;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ if (si->Se > 0) {
+ for (int j = 0; j < si->comps_in_scan; ++j) {
+ int c = si->component_index[j];
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4];
+ }
+ }
+ }
+}
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ // Build DC and AC histograms.
+ std::vector<Histogram> histograms(m->num_contexts);
+ BuildHistograms(cinfo, &histograms[0]);
+
+ // Cluster DC histograms.
+ JpegClusteredHistograms dc_clusters;
+ ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters);
+
+ // Cluster AC histograms.
+ JpegClusteredHistograms ac_clusters;
+ ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4,
+ &ac_clusters);
+
+ // Create Huffman tables and slot ids clusters.
+ size_t num_dc_huff = dc_clusters.histograms.size();
+ m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size();
+ m->huffman_tables =
+ Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+ JHUFF_TBL huff_table = {};
+ if (i < dc_clusters.histograms.size()) {
+ m->slot_id_map[i] = i;
+ BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table);
+ } else {
+ m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff];
+ BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff],
+ &huff_table);
+ }
+ memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table));
+ }
+
+ // Create context map from clustered histogram indexes.
+ m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE);
+ memset(m->context_map, 0, m->num_contexts);
+ for (size_t i = 0; i < m->num_contexts; ++i) {
+ if (i < (size_t)cinfo->num_components) {
+ m->context_map[i] = dc_clusters.histogram_indexes[i];
+ } else if (i >= 4) {
+ m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4];
+ }
+ }
+}
+
+namespace {
+
+constexpr uint8_t kNumExtraBits[256] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
+};
+
+void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) {
+ int huff_code[kJpegHuffmanAlphabetSize];
+ // +1 for a sentinel element.
+ uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+ int p = 0;
+ for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+ int i = table.bits[l];
+ while (i--) huff_size[p++] = l;
+ }
+
+ // Reuse sentinel element.
+ int last_p = p;
+ huff_size[last_p] = 0;
+
+ int next_code = 0;
+ uint32_t si = huff_size[0];
+ p = 0;
+ while (huff_size[p]) {
+ while ((huff_size[p]) == si) {
+ huff_code[p++] = next_code;
+ next_code++;
+ }
+ next_code <<= 1;
+ si++;
+ }
+ for (p = 0; p < last_p; p++) {
+ int i = table.huffval[p];
+ int nbits = kNumExtraBits[i];
+ code->depth[i] = huff_size[p] + nbits;
+ code->code[i] = huff_code[p] << nbits;
+ }
+}
+
+} // namespace
+
+void InitEntropyCoder(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ m->coding_tables =
+ Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
+ for (size_t i = 0; i < m->num_huffman_tables; ++i) {
+ BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]);
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE