summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jpegli/bitstream.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jpegli/bitstream.cc')
-rw-r--r--third_party/jpeg-xl/lib/jpegli/bitstream.cc1136
1 files changed, 1136 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jpegli/bitstream.cc b/third_party/jpeg-xl/lib/jpegli/bitstream.cc
new file mode 100644
index 0000000000..0313ed3071
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/bitstream.cc
@@ -0,0 +1,1136 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bitstream.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/bitstream.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Compress;
+using hwy::HWY_NAMESPACE::CountTrue;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Not;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Shl;
+using hwy::HWY_NAMESPACE::Sub;
+
+using DI = HWY_FULL(int32_t);
+constexpr DI di;
+
+int NumNonZero8x8ExceptDC(const coeff_t* block) {
+ const HWY_CAPPED(coeff_t, 8) di;
+
+ const auto zero = Zero(di);
+ // Add FFFF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+ {
+ // First row has DC, so mask
+ const size_t y = 0;
+ HWY_ALIGN const coeff_t dc_mask_lanes[8] = {-1};
+
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+ // DC counts as zero so we don't include it in nzeros.
+ const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+ // Remaining rows: no mask
+ for (size_t y = 1; y < 8; y++) {
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * 8 + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+ return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero));
+}
+
+void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
+ // TODO(szabadka) SIMDify this.
+ int32_t tmp[DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ tmp[k] = block[kJPEGNaturalOrder[k]];
+ }
+ memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
+}
+
+template <typename DI, class V>
+JXL_INLINE V NumBits(DI di, const V x) {
+ // TODO(szabadka) Add faster implementations for some specific architectures.
+ const auto b1 = And(x, Set(di, 1));
+ const auto b2 = And(x, Set(di, 2));
+ const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1));
+ const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4));
+ const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11));
+ const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26));
+ const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57));
+ const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120));
+ const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247));
+ const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502));
+ const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013));
+ const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036));
+ return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))),
+ Max(Max(b9, b10), Max(b11, b12)));
+}
+
+// Coefficient indexes pre-multiplied by 16 for the symbol calculation.
+HWY_ALIGN constexpr int32_t kIndexes[64] = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
+ 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
+ 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
+ 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816,
+ 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008,
+};
+
+JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT nonzero_idx) {
+ const auto zero = Zero(di);
+ HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1};
+ const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes));
+ int num_nonzeros = 0;
+ int k = 0;
+ {
+ const auto coef = Load(di, block);
+ const auto idx = Load(di, kIndexes);
+ const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero)));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ k += Lanes(di);
+ }
+ for (; k < DCTSIZE2; k += Lanes(di)) {
+ const auto coef = Load(di, &block[k]);
+ const auto idx = Load(di, &kIndexes[k]);
+ const auto nonzero_mask = Not(Eq(coef, zero));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ }
+ return num_nonzeros;
+}
+
+JXL_INLINE void ComputeSymbols(const int num_nonzeros,
+ int32_t* JXL_RESTRICT nonzero_idx,
+ int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT symbols) {
+ nonzero_idx[-1] = -16;
+ const auto one = Set(di, 1);
+ const auto offset = Set(di, 16);
+ for (int i = 0; i < num_nonzeros; i += Lanes(di)) {
+ const auto idx = Load(di, &nonzero_idx[i]);
+ const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]);
+ const auto coeff = Load(di, &block[i]);
+ const auto nbits = NumBits(di, Abs(coeff));
+ const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff);
+ const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one));
+ const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset));
+ Store(symbol, di, symbols + i);
+ Store(bits, di, block + i);
+ }
+}
+
+void WriteBlock(int32_t* JXL_RESTRICT block, int32_t* JXL_RESTRICT symbols,
+ int32_t* JXL_RESTRICT nonzero_idx, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, JpegBitWriter* bw) {
+ ZigZagShuffle(block);
+ int num_nonzeros = CompactBlock(block, nonzero_idx);
+ ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
+ int symbol = symbols[0];
+ WriteBits(bw, dc_huff->depth[symbol], dc_huff->code[symbol] | block[0]);
+ for (int i = 1; i < num_nonzeros; ++i) {
+ symbol = symbols[i];
+ while (symbol > 255) {
+ WriteBits(bw, ac_huff->depth[0xf0], ac_huff->code[0xf0]);
+ symbol -= 256;
+ }
+ WriteBits(bw, ac_huff->depth[symbol], ac_huff->code[symbol] | block[i]);
+ }
+ if (nonzero_idx[num_nonzeros - 1] < 1008) {
+ WriteBits(bw, ac_huff->depth[0], ac_huff->code[0]);
+ }
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ JpegBitWriter* bw = &m->bw;
+ int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ int mcu_y = m->next_iMCU_row;
+ int32_t* block = m->block_tmp;
+ int32_t* symbols = m->block_tmp + DCTSIZE2;
+ int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
+ coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
+ const float* imcu_start[kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
+ }
+ const float* qf = nullptr;
+ if (m->use_adaptive_quantization) {
+ qf = m->quant_field.Row(0);
+ }
+ const size_t qf_stride = m->quant_field.stride();
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ HuffmanCodeTable* dc_huff = &m->huff_tables[comp->dc_tbl_no];
+ HuffmanCodeTable* ac_huff = &m->huff_tables[comp->ac_tbl_no + 4];
+ float* JXL_RESTRICT qmc = m->quant_mul[c];
+ const size_t stride = m->raw_data[c]->stride();
+ const int h_factor = m->h_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ float aq_strength = 0.0f;
+ for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+ for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+ size_t by = mcu_y * comp->v_samp_factor + iy;
+ size_t bx = mcu_x * comp->h_samp_factor + ix;
+ if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
+ WriteBits(bw, dc_huff->depth[0], dc_huff->code[0]);
+ WriteBits(bw, ac_huff->depth[0], ac_huff->code[0]);
+ continue;
+ }
+ if (m->use_adaptive_quantization) {
+ aq_strength = qf[iy * qf_stride + bx * h_factor];
+ }
+ const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
+ ComputeCoefficientBlock(pixels, stride, qmc, aq_strength,
+ zero_bias_offset, zero_bias_mul,
+ m->dct_buffer, block);
+ block[0] -= last_dc_coeff[c];
+ last_dc_coeff[c] += block[0];
+ WriteBlock(block, symbols, nonzero_idx, dc_huff, ac_huff, bw);
+ }
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+HWY_EXPORT(NumNonZero8x8ExceptDC);
+
+// Holds data that is buffered between 8x8 blocks in progressive mode.
+struct DCTCodingState {
+ // The run length of end-of-band symbols in a progressive scan.
+ int eob_run_;
+ // The huffman table to be used when flushing the state.
+ HuffmanCodeTable* cur_ac_huff_;
+ // The sequence of currently buffered refinement bits for a successive
+ // approximation scan (one where Ah > 0).
+ std::vector<int> refinement_bits_;
+};
+
+void DCTCodingStateInit(DCTCodingState* s) {
+ s->eob_run_ = 0;
+ s->cur_ac_huff_ = nullptr;
+ s->refinement_bits_.clear();
+ s->refinement_bits_.reserve(kJPEGMaxCorrectionBits);
+}
+
+static JXL_INLINE void WriteSymbol(int symbol, const HuffmanCodeTable* table,
+ JpegBitWriter* bw) {
+ WriteBits(bw, table->depth[symbol], table->code[symbol]);
+}
+
+// Emit all buffered data to the bit stream using the given Huffman code and
+// bit writer.
+static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) {
+ if (s->eob_run_ > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(s->eob_run_);
+ int symbol = nbits << 4u;
+ WriteSymbol(symbol, s->cur_ac_huff_, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1));
+ }
+ s->eob_run_ = 0;
+ }
+ for (size_t i = 0; i < s->refinement_bits_.size(); ++i) {
+ WriteBits(bw, 1, s->refinement_bits_[i]);
+ }
+ s->refinement_bits_.clear();
+}
+
+// Buffer some more data at the end-of-band (the last non-zero or newly
+// non-zero coefficient within the [Ss, Se] spectral band).
+static JXL_INLINE void BufferEndOfBand(DCTCodingState* s,
+ HuffmanCodeTable* ac_huff,
+ const std::vector<int>* new_bits,
+ JpegBitWriter* bw) {
+ if (s->eob_run_ == 0) {
+ s->cur_ac_huff_ = ac_huff;
+ }
+ ++s->eob_run_;
+ if (new_bits) {
+ s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(),
+ new_bits->end());
+ }
+ if (s->eob_run_ == 0x7FFF ||
+ s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+ Flush(s, bw);
+ }
+}
+
+bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff, HuffmanCodeTable* table,
+ bool pre_shifted = false) {
+ int huff_code[kJpegHuffmanAlphabetSize];
+ // +1 for a sentinel element.
+ uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+ int p = 0;
+ for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+ int i = huff.counts[l];
+ if (p + i > kJpegHuffmanAlphabetSize + 1) {
+ return false;
+ }
+ while (i--) huff_size[p++] = l;
+ }
+
+ if (p == 0) {
+ return true;
+ }
+
+ // Reuse sentinel element.
+ int last_p = p - 1;
+ huff_size[last_p] = 0;
+
+ int code = 0;
+ uint32_t si = huff_size[0];
+ p = 0;
+ while (huff_size[p]) {
+ while ((huff_size[p]) == si) {
+ huff_code[p++] = code;
+ code++;
+ }
+ code <<= 1;
+ si++;
+ }
+ for (p = 0; p < last_p; p++) {
+ int i = huff.values[p];
+ table->depth[i] = huff_size[p];
+ table->code[i] = huff_code[p];
+ if (pre_shifted) {
+ int nbits = i & 0xf;
+ table->depth[i] += nbits;
+ table->code[i] <<= nbits;
+ }
+ }
+ return true;
+}
+
+bool EncodeDCTBlockSequential(const coeff_t* block, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, coeff_t* last_dc_coeff,
+ JpegBitWriter* bw) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = block[0];
+ temp = temp2 - *last_dc_coeff;
+ if (temp == 0) {
+ WriteSymbol(0, dc_huff, bw);
+ } else {
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int dc_mask = (1 << dc_nbits) - 1;
+ WriteSymbol(dc_nbits, dc_huff, bw);
+ WriteBits(bw, dc_nbits, temp2 & dc_mask);
+ }
+ int num_nonzeros = HWY_DYNAMIC_DISPATCH(NumNonZero8x8ExceptDC)(block);
+ for (int k = 1; k < 64; ++k) {
+ if (num_nonzeros == 0) {
+ WriteSymbol(0, ac_huff, bw);
+ break;
+ }
+ int r = 0;
+ while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ k++;
+ }
+ --num_nonzeros;
+ if (temp < 0) {
+ temp = -temp;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int ac_mask = (1 << ac_nbits) - 1;
+ int symbol = (r << 4u) + ac_nbits;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, ac_nbits, temp2 & ac_mask);
+ }
+ return true;
+}
+
+bool EncodeDCTBlockProgressive(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, int Ss, int Se,
+ int Al, DCTCodingState* coding_state,
+ coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ coeff_t temp2;
+ coeff_t temp;
+ if (Ss == 0) {
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ WriteSymbol(nbits, dc_huff, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ }
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ continue;
+ }
+ Flush(coding_state, bw);
+ while (r > 15) {
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ r = 0;
+ }
+ if (r > 0) {
+ BufferEndOfBand(coding_state, ac_huff, nullptr, bw);
+ if (!eob_run_allowed) {
+ Flush(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+bool EncodeRefinementBits(const coeff_t* coeffs, HuffmanCodeTable* ac_huff,
+ int Ss, int Se, int Al, DCTCodingState* coding_state,
+ JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ if (Ss == 0) {
+ // Emit next bit of DC component.
+ WriteBits(bw, 1, (coeffs[0] >> Al) & 1);
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int abs_values[kDCTBlockSize];
+ int eob = 0;
+ for (int k = Ss; k <= Se; k++) {
+ const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+ abs_values[k] = abs_val >> Al;
+ if (abs_values[k] == 1) {
+ eob = k;
+ }
+ }
+ int r = 0;
+ std::vector<int> refinement_bits;
+ refinement_bits.reserve(kDCTBlockSize);
+ for (int k = Ss; k <= Se; k++) {
+ if (abs_values[k] == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15 && k <= eob) {
+ Flush(coding_state, bw);
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ }
+ if (abs_values[k] > 1) {
+ refinement_bits.push_back(abs_values[k] & 1u);
+ continue;
+ }
+ Flush(coding_state, bw);
+ int symbol = (r << 4u) + 1;
+ int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, 1, new_non_zero_bit);
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ r = 0;
+ }
+ if (r > 0 || !refinement_bits.empty()) {
+ BufferEndOfBand(coding_state, ac_huff, &refinement_bits, bw);
+ if (!eob_run_allowed) {
+ Flush(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) {
+ size_t pos = 0;
+ while (pos < bufsize) {
+ if (cinfo->dest->free_in_buffer == 0 &&
+ !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+ JPEGLI_ERROR("Destination suspension is not supported in markers.");
+ }
+ size_t len = std::min<size_t>(cinfo->dest->free_in_buffer, bufsize - pos);
+ memcpy(cinfo->dest->next_output_byte, buf + pos, len);
+ pos += len;
+ cinfo->dest->free_in_buffer -= len;
+ cinfo->dest->next_output_byte += len;
+ }
+}
+
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes) {
+ WriteOutput(cinfo, bytes.data(), bytes.size());
+}
+
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes) {
+ WriteOutput(cinfo, bytes.begin(), bytes.size());
+}
+
+void EncodeAPP0(j_compress_ptr cinfo) {
+ WriteOutput(cinfo,
+ {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0',
+ cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+ cinfo->density_unit, static_cast<uint8_t>(cinfo->X_density >> 8),
+ static_cast<uint8_t>(cinfo->X_density & 0xff),
+ static_cast<uint8_t>(cinfo->Y_density >> 8),
+ static_cast<uint8_t>(cinfo->Y_density & 0xff), 0, 0});
+}
+
+void EncodeAPP14(j_compress_ptr cinfo) {
+ uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr ? 1
+ : cinfo->jpeg_color_space == JCS_YCCK ? 2
+ : 0;
+ WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0,
+ 0, 0, color_transform});
+}
+
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) {
+ if (cinfo->data_precision != kJpegPrecision) {
+ is_baseline = false;
+ JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision);
+ }
+ const uint8_t marker = cinfo->progressive_mode ? 0xc2
+ : is_baseline ? 0xc0
+ : 0xc1;
+ const size_t n_comps = cinfo->num_components;
+ const size_t marker_len = 8 + 3 * n_comps;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = marker;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = kJpegPrecision;
+ data[pos++] = cinfo->image_height >> 8u;
+ data[pos++] = cinfo->image_height & 0xFFu;
+ data[pos++] = cinfo->image_width >> 8u;
+ data[pos++] = cinfo->image_width & 0xFFu;
+ data[pos++] = n_comps;
+ for (size_t i = 0; i < n_comps; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ data[pos++] = comp->component_id;
+ data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor));
+ const uint32_t quant_idx = comp->quant_tbl_no;
+ if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) {
+ JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx);
+ }
+ data[pos++] = quant_idx;
+ }
+ WriteOutput(cinfo, data);
+}
+
+void EncodeSOS(j_compress_ptr cinfo, int scan_index) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const ScanCodingInfo& sci = cinfo->master->scan_coding_info[scan_index];
+ const size_t marker_len = 6 + 2 * scan_info->comps_in_scan;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDA;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = scan_info->comps_in_scan;
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ data[pos++] = cinfo->comp_info[comp_idx].component_id;
+ data[pos++] = (sci.dc_tbl_idx[i] << 4u) + (sci.ac_tbl_idx[i] - 4);
+ }
+ data[pos++] = scan_info->Ss;
+ data[pos++] = scan_info->Se;
+ data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al));
+ WriteOutput(cinfo, data);
+}
+
+void EncodeDHT(j_compress_ptr cinfo, const JPEGHuffmanCode* huffman_codes,
+ size_t num_huffman_codes, bool pre_shifted) {
+ if (num_huffman_codes == 0) {
+ return;
+ }
+
+ size_t marker_len = 2;
+ for (size_t i = 0; i < num_huffman_codes; ++i) {
+ const JPEGHuffmanCode& huff = huffman_codes[i];
+ if (huff.sent_table) continue;
+ marker_len += kJpegHuffmanMaxBitLength;
+ for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) {
+ marker_len += huff.counts[j];
+ }
+ }
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xC4;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ for (size_t i = 0; i < num_huffman_codes; ++i) {
+ const JPEGHuffmanCode& huff = huffman_codes[i];
+ size_t index = huff.slot_id;
+ HuffmanCodeTable* huff_table;
+ if (index & 0x10) {
+ huff_table = &cinfo->master->huff_tables[index - 12];
+ } else {
+ huff_table = &cinfo->master->huff_tables[index];
+ }
+ // TODO(eustas): cache
+ // TODO(eustas): set up non-existing symbols
+ if (!BuildHuffmanCodeTable(huff, huff_table, pre_shifted)) {
+ JPEGLI_ERROR("Failed to build Huffman code table.");
+ }
+ if (huff.sent_table) continue;
+ size_t total_count = 0;
+ size_t max_length = 0;
+ for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) {
+ if (huff.counts[i] != 0) {
+ max_length = i;
+ }
+ total_count += huff.counts[i];
+ }
+ --total_count;
+ data[pos++] = huff.slot_id;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]);
+ }
+ for (size_t i = 0; i < total_count; ++i) {
+ data[pos++] = huff.values[i];
+ }
+ }
+ if (marker_len > 2) {
+ WriteOutput(cinfo, data);
+ }
+}
+
+void EncodeDQT(j_compress_ptr cinfo, bool write_all_tables, bool* is_baseline) {
+ uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)]; // 520 bytes
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDB;
+ pos += 2; // Length will be filled in later.
+
+ int send_table[NUM_QUANT_TBLS] = {};
+ if (write_all_tables) {
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1;
+ }
+ } else {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ send_table[cinfo->comp_info[c].quant_tbl_no] = 1;
+ }
+ }
+
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (!send_table[i]) continue;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i];
+ if (quant_table == nullptr) {
+ JPEGLI_ERROR("Missing quant table %d", i);
+ }
+ int precision = 0;
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ if (quant_table->quantval[k] > 255) {
+ precision = 1;
+ *is_baseline = false;
+ }
+ }
+ if (quant_table->sent_table) {
+ continue;
+ }
+ data[pos++] = (precision << 4) + i;
+ for (size_t j = 0; j < DCTSIZE2; ++j) {
+ int val_idx = kJPEGNaturalOrder[j];
+ int val = quant_table->quantval[val_idx];
+ if (val == 0) {
+ JPEGLI_ERROR("Invalid quantval 0.");
+ }
+ if (precision) {
+ data[pos++] = val >> 8;
+ }
+ data[pos++] = val & 0xFFu;
+ }
+ quant_table->sent_table = TRUE;
+ }
+ if (pos > 4) {
+ data[2] = (pos - 2) >> 8u;
+ data[3] = (pos - 2) & 0xFFu;
+ WriteOutput(cinfo, data, pos);
+ }
+}
+
+bool EncodeDRI(j_compress_ptr cinfo) {
+ WriteOutput(cinfo, {0xFF, 0xDD, 0, 4,
+ static_cast<uint8_t>(cinfo->restart_interval >> 8),
+ static_cast<uint8_t>(cinfo->restart_interval & 0xFF)});
+ return true;
+}
+
+static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) {
+ bw->data[bw->pos++] = 0xFF;
+ bw->data[bw->pos++] = marker;
+}
+
+void ProgressMonitorEncodePass(j_compress_ptr cinfo, size_t scan_index,
+ size_t mcu_y) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ cinfo->progress->completed_passes = 1 + scan_index;
+ cinfo->progress->pass_counter = mcu_y;
+ cinfo->progress->pass_limit = cinfo->total_iMCU_rows;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+bool EncodeScan(j_compress_ptr cinfo, int scan_index) {
+ jpeg_comp_master* m = cinfo->master;
+ const int restart_interval = cinfo->restart_interval;
+ int restarts_to_go = restart_interval;
+ int next_restart_marker = 0;
+
+ JpegBitWriter* bw = &m->bw;
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+ DCTCodingState coding_state;
+ DCTCodingStateInit(&coding_state);
+
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const ScanCodingInfo& sci = m->scan_coding_info[scan_index];
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ jpeg_component_info* base_comp =
+ &cinfo->comp_info[scan_info->component_index[0]];
+ // h_group / v_group act as numerators for converting number of blocks to
+ // number of MCU. In interleaved mode it is 1, so MCU is represented with
+ // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+ // be the samping factor, consequently MCU is always represented with single
+ // block.
+ const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor;
+ const int v_group = is_interleaved ? 1 : base_comp->v_samp_factor;
+ int MCUs_per_row =
+ DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor);
+ int MCU_rows =
+ DivCeil(cinfo->image_height * v_group, 8 * cinfo->max_v_samp_factor);
+ const bool is_progressive = cinfo->progressive_mode;
+ const int Al = scan_info->Al;
+ const int Ah = scan_info->Ah;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ HWY_ALIGN constexpr coeff_t kDummyBlock[DCTSIZE2] = {0};
+
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+ ProgressMonitorEncodePass(cinfo, scan_index, mcu_y);
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int by0 = mcu_y * n_blocks_y;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(n_blocks_y, block_rows_left);
+ ba[i] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+ by0, max_block_rows, false);
+ }
+ for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ Flush(&coding_state, bw);
+ JumpToByteBoundary(bw);
+ EmitMarker(bw, 0xD0 + next_restart_marker);
+ next_restart_marker += 1;
+ next_restart_marker &= 0x7;
+ restarts_to_go = restart_interval;
+ memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+ }
+ // Encode one MCU
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ HuffmanCodeTable* dc_huff = &m->huff_tables[sci.dc_tbl_idx[i]];
+ HuffmanCodeTable* ac_huff = &m->huff_tables[sci.ac_tbl_idx[i]];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ size_t block_y = mcu_y * n_blocks_y + iy;
+ size_t block_x = mcu_x * n_blocks_x + ix;
+ const coeff_t* block;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ block = kDummyBlock;
+ } else {
+ block = &ba[i][iy][block_x][0];
+ }
+ bool ok;
+ if (!is_progressive) {
+ ok = EncodeDCTBlockSequential(block, dc_huff, ac_huff,
+ last_dc_coeff + i, bw);
+ } else if (Ah == 0) {
+ ok = EncodeDCTBlockProgressive(block, dc_huff, ac_huff, Ss, Se,
+ Al, &coding_state,
+ last_dc_coeff + i, bw);
+ } else {
+ ok = EncodeRefinementBits(block, ac_huff, Ss, Se, Al,
+ &coding_state, bw);
+ }
+ if (!ok) return false;
+ }
+ }
+ }
+ --restarts_to_go;
+ }
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ }
+ Flush(&coding_state, bw);
+ JumpToByteBoundary(bw);
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ if (!bw->healthy) return false;
+
+ return true;
+}
+
+struct Token {
+ uint8_t histo_idx;
+ uint8_t symbol;
+ uint16_t bits;
+ Token(int i, int s, int b) : histo_idx(i), symbol(s), bits(b) {}
+};
+
+void ComputeTokensForBlock(const coeff_t* block, int histo_dc, int histo_ac,
+ coeff_t* last_dc_coeff, Token** tokens_ptr) {
+ Token* next_token = *tokens_ptr;
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = block[0];
+ temp = temp2 - *last_dc_coeff;
+ if (temp == 0) {
+ *next_token++ = Token(histo_dc, 0, 0);
+ } else {
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int dc_mask = (1 << dc_nbits) - 1;
+ *next_token++ = Token(histo_dc, dc_nbits, temp2 & dc_mask);
+ }
+ int num_nonzeros = HWY_DYNAMIC_DISPATCH(NumNonZero8x8ExceptDC)(block);
+ for (int k = 1; k < 64; ++k) {
+ if (num_nonzeros == 0) {
+ *next_token++ = Token(histo_ac, 0, 0);
+ break;
+ }
+ int r = 0;
+ while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ k++;
+ }
+ --num_nonzeros;
+ if (temp < 0) {
+ temp = -temp;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ *next_token++ = Token(histo_ac, 0xf0, 0);
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int ac_mask = (1 << ac_nbits) - 1;
+ int symbol = (r << 4u) + ac_nbits;
+ *next_token++ = Token(histo_ac, symbol, temp2 & ac_mask);
+ }
+ *tokens_ptr = next_token;
+}
+
+struct TokenArray {
+ Token* tokens = nullptr;
+ size_t num_tokens = 0;
+};
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
+ int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ size_t blocks_per_mcu = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
+}
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+ size_t num_tokens, size_t max_per_row) {
+ size_t estimate;
+ if (mcu_y == 0) {
+ estimate = 16 * max_per_row;
+ } else {
+ estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
+ }
+ size_t mcus_left = ysize_mcus - mcu_y;
+ return std::min(mcus_left * max_per_row,
+ std::max(max_per_row, estimate - num_tokens));
+}
+
+void ComputeTokens(j_compress_ptr cinfo,
+ std::vector<TokenArray>* token_arrays) {
+ jpeg_comp_master* m = cinfo->master;
+ TokenArray ta;
+ Token* next_token = ta.tokens;
+ size_t num_tokens = 0;
+ size_t total_num_tokens = 0;
+ size_t max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+ int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ for (int mcu_y = 0; mcu_y < ysize_mcus; ++mcu_y) {
+ ProgressMonitorEncodePass(cinfo, 0, mcu_y);
+ ta.num_tokens = next_token - ta.tokens;
+ if (ta.num_tokens + max_tokens_per_mcu_row > num_tokens) {
+ if (ta.tokens) {
+ token_arrays->push_back(ta);
+ total_num_tokens += ta.num_tokens;
+ }
+ num_tokens = EstimateNumTokens(cinfo, mcu_y, ysize_mcus, total_num_tokens,
+ max_tokens_per_mcu_row);
+ ta.tokens = Allocate<Token>(cinfo, num_tokens, JPOOL_IMAGE);
+ next_token = ta.tokens;
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = mcu_y * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ ba[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
+ max_block_rows, false);
+ }
+ if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) {
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ ComputeTokensForBlock(&ba[c][0][mcu_x][0], c, c + 4,
+ &last_dc_coeff[c], &next_token);
+ }
+ }
+ continue;
+ }
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+ for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+ size_t block_y = mcu_y * comp->v_samp_factor + iy;
+ size_t block_x = mcu_x * comp->h_samp_factor + ix;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ *next_token++ = Token(c, 0, 0);
+ *next_token++ = Token(c + 4, 0, 0);
+ continue;
+ }
+ ComputeTokensForBlock(&ba[c][iy][block_x][0], c, c + 4,
+ &last_dc_coeff[c], &next_token);
+ }
+ }
+ }
+ }
+ }
+ ta.num_tokens = next_token - ta.tokens;
+ token_arrays->push_back(ta);
+}
+
+void WriteTokens(j_compress_ptr cinfo, const Token* tokens, size_t num_tokens,
+ const HuffmanCodeTable* huff_tables, const int* context_map,
+ JpegBitWriter* bw) {
+ size_t cycle_len = bw->len / 8;
+ size_t next_cycle = cycle_len;
+ for (size_t i = 0; i < num_tokens; ++i) {
+ Token t = tokens[i];
+ int nbits = t.symbol & 0xf;
+ WriteSymbol(t.symbol, &huff_tables[context_map[t.histo_idx]], bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, t.bits);
+ }
+ if (--next_cycle == 0) {
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ next_cycle = cycle_len;
+ }
+ }
+}
+
+void BuildHistograms(const Token* tokens, size_t num_tokens,
+ Histogram* histograms) {
+ for (size_t j = 0; j < num_tokens; ++j) {
+ Token t = tokens[j];
+ ++histograms[t.histo_idx].count[t.symbol];
+ }
+}
+
+void EncodeSingleScan(j_compress_ptr cinfo) {
+ std::vector<TokenArray> token_arrays;
+ ComputeTokens(cinfo, &token_arrays);
+ Histogram histograms[8] = {};
+ for (size_t i = 0; i < token_arrays.size(); ++i) {
+ Token* tokens = token_arrays[i].tokens;
+ size_t num_tokens = token_arrays[i].num_tokens;
+ BuildHistograms(tokens, num_tokens, histograms);
+ }
+ JpegClusteredHistograms dc_clusters;
+ ClusterJpegHistograms(histograms, 4, &dc_clusters);
+ JpegClusteredHistograms ac_clusters;
+ ClusterJpegHistograms(histograms + 4, 4, &ac_clusters);
+
+ JPEGHuffmanCode* huffman_codes =
+ Allocate<JPEGHuffmanCode>(cinfo, 8, JPOOL_IMAGE);
+ size_t num_huffman_codes = 0;
+ for (size_t i = 0; i < dc_clusters.histograms.size(); ++i) {
+ AddJpegHuffmanCode(dc_clusters.histograms[i], i, huffman_codes,
+ &num_huffman_codes);
+ }
+ for (size_t i = 0; i < ac_clusters.histograms.size(); ++i) {
+ AddJpegHuffmanCode(ac_clusters.histograms[i], 0x10 + i, huffman_codes,
+ &num_huffman_codes);
+ }
+
+ bool is_baseline = true;
+ int context_map[8];
+ ScanCodingInfo sci = {};
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (dc_clusters.histogram_indexes[c] > 1 ||
+ ac_clusters.histogram_indexes[c] > 1) {
+ is_baseline = false;
+ }
+ sci.dc_tbl_idx[c] = dc_clusters.histogram_indexes[c];
+ sci.ac_tbl_idx[c] = ac_clusters.histogram_indexes[c] + 4;
+ context_map[c] = sci.dc_tbl_idx[c];
+ context_map[c + 4] = sci.ac_tbl_idx[c];
+ }
+ sci.num_huffman_codes = num_huffman_codes;
+ memcpy(cinfo->master->scan_coding_info, &sci, sizeof(sci));
+ EncodeDQT(cinfo, /*write_all_tables=*/false, &is_baseline);
+ EncodeSOF(cinfo, is_baseline);
+ EncodeDHT(cinfo, huffman_codes, num_huffman_codes);
+ EncodeSOS(cinfo, 0);
+
+ JpegBitWriter* bw = &cinfo->master->bw;
+ HuffmanCodeTable* huff_tables = cinfo->master->huff_tables;
+ for (size_t i = 0; i < token_arrays.size(); ++i) {
+ Token* tokens = token_arrays[i].tokens;
+ size_t num_tokens = token_arrays[i].num_tokens;
+ WriteTokens(cinfo, tokens, num_tokens, huff_tables, context_map, bw);
+ }
+ JumpToByteBoundary(bw);
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ if (!bw->healthy) {
+ JPEGLI_ERROR("Failed to encode scan.");
+ }
+}
+
+HWY_EXPORT(WriteiMCURow);
+void WriteiMCURow(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE