From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- third_party/jpeg-xl/lib/jpegli/entropy_coding.cc | 605 +++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 third_party/jpeg-xl/lib/jpegli/entropy_coding.cc (limited to 'third_party/jpeg-xl/lib/jpegli/entropy_coding.cc') diff --git a/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc new file mode 100644 index 0000000000..110a36a3e9 --- /dev/null +++ b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc @@ -0,0 +1,605 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/entropy_coding.h" + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jxl/base/bits.h" + +namespace jpegli { +namespace { + +float HistogramCost(const Histogram& histo) { + std::vector counts(kJpegHuffmanAlphabetSize + 1); + std::vector depths(kJpegHuffmanAlphabetSize + 1); + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + counts[i] = histo.count[i]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8; + size_t data_bits = 0; + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + header_bits += 8; + data_bits += counts[i] * depths[i]; + } + } + return header_bits + data_bits; +} + +void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + c->count[i] = a.count[i] + b.count[i]; + } +} + +bool IsEmptyHistogram(const Histogram& histo) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (histo.count[i]) return false; + } + return true; +} + +} // namespace + +void ClusterJpegHistograms(const Histogram* histograms, size_t num, + JpegClusteredHistograms* clusters) { + clusters->histogram_indexes.resize(num); + std::vector slot_histograms; + std::vector slot_costs; + for (size_t i = 0; i < num; ++i) { + const Histogram& cur = histograms[i]; + if (IsEmptyHistogram(cur)) { + continue; + } + float best_cost = HistogramCost(cur); + size_t best_slot = slot_histograms.size(); + for (size_t j = 0; j < slot_histograms.size(); ++j) { + size_t prev_idx = slot_histograms[j]; + const Histogram& prev = clusters->histograms[prev_idx]; + Histogram combined; + AddHistograms(prev, cur, &combined); + float combined_cost = HistogramCost(combined); + float cost = combined_cost - slot_costs[j]; + if (cost < best_cost) { + best_cost = cost; + best_slot = j; + } + } + if (best_slot == slot_histograms.size()) { + // Create new histogram. + size_t histogram_index = clusters->histograms.size(); + clusters->histograms.push_back(cur); + clusters->histogram_indexes[i] = histogram_index; + if (best_slot < 4) { + // We have a free slot, so we put the new histogram there. + slot_histograms.push_back(histogram_index); + slot_costs.push_back(best_cost); + } else { + // TODO(szabadka) Find the best histogram to replce. + best_slot = (clusters->slot_ids.back() + 1) % 4; + } + slot_histograms[best_slot] = histogram_index; + slot_costs[best_slot] = best_cost; + clusters->slot_ids.push_back(best_slot); + } else { + // Merge this histogram with a previous one. + size_t histogram_index = slot_histograms[best_slot]; + const Histogram& prev = clusters->histograms[histogram_index]; + AddHistograms(prev, cur, &clusters->histograms[histogram_index]); + clusters->histogram_indexes[i] = histogram_index; + JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot); + slot_costs[best_slot] += best_cost; + } + } +} + +void BuildJpegHuffmanCode(const Histogram& histo, JPEGHuffmanCode* huff) { + std::vector counts(kJpegHuffmanAlphabetSize + 1); + std::vector depths(kJpegHuffmanAlphabetSize + 1); + for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) { + counts[j] = histo.count[j]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + std::fill(std::begin(huff->counts), std::end(huff->counts), 0); + std::fill(std::begin(huff->values), std::end(huff->values), 0); + for (size_t i = 0; i <= kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + ++huff->counts[depths[i]]; + } + } + int offset[kJpegHuffmanMaxBitLength + 1] = {0}; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + offset[i] = offset[i - 1] + huff->counts[i - 1]; + } + for (size_t i = 0; i <= kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + huff->values[offset[depths[i]]++] = i; + } + } +} + +void AddJpegHuffmanCode(const Histogram& histogram, size_t slot_id, + JPEGHuffmanCode* huff_codes, size_t* num_huff_codes) { + JPEGHuffmanCode huff_code = {}; + huff_code.slot_id = slot_id; + BuildJpegHuffmanCode(histogram, &huff_code); + memcpy(&huff_codes[*num_huff_codes], &huff_code, sizeof(huff_code)); + ++(*num_huff_codes); +} + +namespace { +void SetJpegHuffmanCode(const JpegClusteredHistograms& clusters, + size_t histogram_id, size_t slot_id_offset, + std::vector& slot_histograms, + uint32_t* slot_id, bool* is_baseline, + JPEGHuffmanCode* huff_codes, size_t* num_huff_codes) { + JXL_ASSERT(histogram_id < clusters.histogram_indexes.size()); + uint32_t histogram_index = clusters.histogram_indexes[histogram_id]; + uint32_t id = clusters.slot_ids[histogram_index]; + if (id > 1) { + *is_baseline = false; + } + *slot_id = id + (slot_id_offset / 4); + if (slot_histograms[id] != histogram_index) { + AddJpegHuffmanCode(clusters.histograms[histogram_index], + slot_id_offset + id, huff_codes, num_huff_codes); + slot_histograms[id] = histogram_index; + } +} + +struct DCTState { + int eob_run = 0; + size_t num_refinement_bits = 0; + Histogram* ac_histo = nullptr; +}; + +static JXL_INLINE void ProcessFlush(DCTState* s) { + if (s->eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero(s->eob_run); + int symbol = nbits << 4u; + ++s->ac_histo->count[symbol]; + s->eob_run = 0; + } + s->num_refinement_bits = 0; +} + +static JXL_INLINE void ProcessEndOfBand(DCTState* s, size_t new_refinement_bits, + Histogram* new_ac_histo) { + if (s->eob_run == 0) { + s->ac_histo = new_ac_histo; + } + ++s->eob_run; + s->num_refinement_bits += new_refinement_bits; + if (s->eob_run == 0x7FFF || + s->num_refinement_bits > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) { + ProcessFlush(s); + } +} + +bool ProcessDCTBlockSequential(const coeff_t* coeffs, Histogram* dc_histo, + Histogram* ac_histo, coeff_t* last_dc_coeff) { + coeff_t temp2; + coeff_t temp; + temp2 = coeffs[0]; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp; + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp2--; + } + int dc_nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero(temp) + 1); + ++dc_histo->count[dc_nbits]; + if (dc_nbits >= 12) return false; + int r = 0; + for (int k = 1; k < 64; ++k) { + if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) { + r++; + continue; + } + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp2 = ~temp; + } else { + temp2 = temp; + } + while (r > 15) { + ++ac_histo->count[0xf0]; + r -= 16; + } + int ac_nbits = jxl::FloorLog2Nonzero(temp) + 1; + if (ac_nbits >= 16) return false; + int symbol = (r << 4u) + ac_nbits; + ++ac_histo->count[symbol]; + r = 0; + } + if (r > 0) { + ++ac_histo->count[0]; + } + return true; +} + +bool ProcessDCTBlockProgressive(const coeff_t* coeffs, Histogram* dc_histo, + Histogram* ac_histo, int Ss, int Se, int Al, + DCTState* s, coeff_t* last_dc_coeff) { + bool eob_run_allowed = Ss > 0; + coeff_t temp2; + coeff_t temp; + if (Ss == 0) { + temp2 = coeffs[0] >> Al; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp; + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp2--; + } + int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero(temp) + 1); + ++dc_histo->count[nbits]; + ++Ss; + } + if (Ss > Se) { + return true; + } + int r = 0; + for (int k = Ss; k <= Se; ++k) { + if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) { + r++; + continue; + } + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp >>= Al; + temp2 = ~temp; + } else { + temp >>= Al; + temp2 = temp; + } + if (temp == 0) { + r++; + continue; + } + ProcessFlush(s); + while (r > 15) { + ++ac_histo->count[0xf0]; + r -= 16; + } + int nbits = jxl::FloorLog2Nonzero(temp) + 1; + int symbol = (r << 4u) + nbits; + ++ac_histo->count[symbol]; + r = 0; + } + if (r > 0) { + ProcessEndOfBand(s, 0, ac_histo); + if (!eob_run_allowed) { + ProcessFlush(s); + } + } + return true; +} + +bool ProcessRefinementBits(const coeff_t* coeffs, Histogram* ac_histo, int Ss, + int Se, int Al, DCTState* s) { + bool eob_run_allowed = Ss > 0; + if (Ss == 0) { + ++Ss; + } + if (Ss > Se) { + return true; + } + int abs_values[kDCTBlockSize]; + int eob = 0; + for (int k = Ss; k <= Se; k++) { + const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]); + abs_values[k] = abs_val >> Al; + if (abs_values[k] == 1) { + eob = k; + } + } + int r = 0; + size_t num_refinement_bits = 0; + for (int k = Ss; k <= Se; k++) { + if (abs_values[k] == 0) { + r++; + continue; + } + while (r > 15 && k <= eob) { + ProcessFlush(s); + ++ac_histo->count[0xf0]; + r -= 16; + num_refinement_bits = 0; + } + if (abs_values[k] > 1) { + ++num_refinement_bits; + continue; + } + ProcessFlush(s); + int symbol = (r << 4u) + 1; + ++ac_histo->count[symbol]; + num_refinement_bits = 0; + r = 0; + } + if (r > 0 || num_refinement_bits > 0) { + ProcessEndOfBand(s, num_refinement_bits, ac_histo); + if (!eob_run_allowed) { + ProcessFlush(s); + } + } + return true; +} + +void ProgressMonitorHistogramPass(j_compress_ptr cinfo, size_t scan_index, + size_t mcu_y) { + if (cinfo->progress == nullptr) { + return; + } + cinfo->progress->completed_passes = 1 + scan_index; + cinfo->progress->pass_counter = mcu_y; + cinfo->progress->pass_limit = cinfo->total_iMCU_rows; + (*cinfo->progress->progress_monitor)(reinterpret_cast(cinfo)); +} + +bool ProcessScan(j_compress_ptr cinfo, + size_t scan_index, int* histo_index, Histogram* dc_histograms, + Histogram* ac_histograms) { + jpeg_comp_master* m = cinfo->master; + size_t restart_interval = RestartIntervalForScan(cinfo, scan_index); + int restarts_to_go = restart_interval; + coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0}; + DCTState s; + + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + // "Non-interleaved" means color data comes in separate scans, in other words + // each scan can contain only one color component. + const bool is_interleaved = (scan_info->comps_in_scan > 1); + jpeg_component_info* base_comp = + &cinfo->comp_info[scan_info->component_index[0]]; + // h_group / v_group act as numerators for converting number of blocks to + // number of MCU. In interleaved mode it is 1, so MCU is represented with + // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to + // be the samping factor, consequently MCU is always represented with single + // block. + const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor; + const int v_group = is_interleaved ? 1 : base_comp->v_samp_factor; + int MCUs_per_row = + DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor); + int MCU_rows = + DivCeil(cinfo->image_height * v_group, 8 * cinfo->max_v_samp_factor); + const bool is_progressive = cinfo->progressive_mode; + const int Al = scan_info->Al; + const int Ah = scan_info->Ah; + const int Ss = scan_info->Ss; + const int Se = scan_info->Se; + constexpr coeff_t kDummyBlock[DCTSIZE2] = {0}; + + JBLOCKARRAY ba[MAX_COMPS_IN_SCAN]; + for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) { + ProgressMonitorHistogramPass(cinfo, scan_index, mcu_y); + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int by0 = mcu_y * n_blocks_y; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(n_blocks_y, block_rows_left); + ba[i] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coeff_buffers[comp_idx], + by0, max_block_rows, false); + } + for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) { + // Possibly emit a restart marker. + if (restart_interval > 0 && restarts_to_go == 0) { + ProcessFlush(&s); + restarts_to_go = restart_interval; + memset(last_dc_coeff, 0, sizeof(last_dc_coeff)); + } + // Encode one MCU + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int histo_idx = *histo_index + i; + Histogram* dc_histo = &dc_histograms[histo_idx]; + Histogram* ac_histo = &ac_histograms[histo_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1; + for (int iy = 0; iy < n_blocks_y; ++iy) { + for (int ix = 0; ix < n_blocks_x; ++ix) { + size_t block_y = mcu_y * n_blocks_y + iy; + size_t block_x = mcu_x * n_blocks_x + ix; + const coeff_t* block; + if (block_x >= comp->width_in_blocks || + block_y >= comp->height_in_blocks) { + block = kDummyBlock; + } else { + block = &ba[i][iy][block_x][0]; + } + bool ok; + if (!is_progressive) { + ok = ProcessDCTBlockSequential(block, dc_histo, ac_histo, + last_dc_coeff + i); + } else if (Ah == 0) { + ok = ProcessDCTBlockProgressive(block, dc_histo, ac_histo, Ss, Se, + Al, &s, last_dc_coeff + i); + } else { + ok = ProcessRefinementBits(block, ac_histo, Ss, Se, Al, &s); + } + if (!ok) return false; + } + } + } + --restarts_to_go; + } + } + ProcessFlush(&s); + *histo_index += scan_info->comps_in_scan; + return true; +} + +void ProcessJpeg(j_compress_ptr cinfo, + std::vector* dc_histograms, + std::vector* ac_histograms) { + int histo_index = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + if (!ProcessScan(cinfo, i, &histo_index, &(*dc_histograms)[0], + &(*ac_histograms)[0])) { + JPEGLI_ERROR("Invalid scan."); + } + } +} + +void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc, + JPEGHuffmanCode* huffman_codes, + size_t* num_huffman_codes) { + const char* type = is_dc ? "DC" : "AC"; + if (index < 0 || index >= NUM_HUFF_TBLS) { + JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index); + } + JHUFF_TBL* table = + is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index]; + if (table == nullptr) { + JPEGLI_ERROR("Missing %s Huffman table %d", type, index); + } + ValidateHuffmanTable(reinterpret_cast(cinfo), table, is_dc); + JPEGHuffmanCode huff = {}; + size_t max_depth = 0; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + if (table->bits[i] != 0) max_depth = i; + huff.counts[i] = table->bits[i]; + } + ++huff.counts[max_depth]; + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + huff.values[i] = table->huffval[i]; + } + huff.slot_id = index + (is_dc ? 0 : 0x10); + huff.sent_table = table->sent_table; + bool have_slot = false; + for (size_t i = 0; i < *num_huffman_codes; ++i) { + if (huffman_codes[i].slot_id == huff.slot_id) have_slot = true; + } + if (!have_slot) { + memcpy(&huffman_codes[*num_huffman_codes], &huff, sizeof(huff)); + ++(*num_huffman_codes); + } +} + +} // namespace + +void CopyHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline) { + jpeg_comp_master* m = cinfo->master; + m->huffman_codes = + Allocate(cinfo, 2 * cinfo->num_components, JPOOL_IMAGE); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (comp->dc_tbl_no > 1 || comp->ac_tbl_no > 1) { + *is_baseline = false; + } + CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, m->huffman_codes, + &m->num_huffman_codes); + CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, m->huffman_codes, + &m->num_huffman_codes); + } + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + ScanCodingInfo sci = {}; + for (int j = 0; j < si->comps_in_scan; ++j) { + int ci = si->component_index[j]; + sci.dc_tbl_idx[j] = cinfo->comp_info[ci].dc_tbl_no; + sci.ac_tbl_idx[j] = cinfo->comp_info[ci].ac_tbl_no + 4; + } + if (i == 0) { + sci.num_huffman_codes = m->num_huffman_codes; + } + memcpy(&m->scan_coding_info[i], &sci, sizeof(sci)); + } +} + +size_t RestartIntervalForScan(j_compress_ptr cinfo, size_t scan_index) { + if (cinfo->restart_in_rows <= 0) { + return cinfo->restart_interval; + } else { + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const bool is_interleaved = (scan_info->comps_in_scan > 1); + jpeg_component_info* base_comp = + &cinfo->comp_info[scan_info->component_index[0]]; + const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor; + int MCUs_per_row = + DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor); + return std::min(MCUs_per_row * cinfo->restart_in_rows, 65535u); + } +} + +void OptimizeHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline) { + jpeg_comp_master* m = cinfo->master; + // Gather histograms. + size_t num_histo = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + num_histo += cinfo->scan_info[i].comps_in_scan; + } + std::vector dc_histograms(num_histo); + std::vector ac_histograms(num_histo); + ProcessJpeg(cinfo, &dc_histograms, &ac_histograms); + + // Cluster DC histograms. + JpegClusteredHistograms dc_clusters; + ClusterJpegHistograms(dc_histograms.data(), dc_histograms.size(), + &dc_clusters); + + // Cluster AC histograms. + JpegClusteredHistograms ac_clusters; + ClusterJpegHistograms(ac_histograms.data(), ac_histograms.size(), + &ac_clusters); + + // Add the first 4 DC and AC histograms in the first DHT segment. + std::vector dc_slot_histograms; + std::vector ac_slot_histograms; + m->huffman_codes = Allocate(cinfo, num_histo, JPOOL_IMAGE); + for (size_t i = 0; i < dc_clusters.histograms.size(); ++i) { + if (i >= 4) break; + JXL_ASSERT(dc_clusters.slot_ids[i] == i); + AddJpegHuffmanCode(dc_clusters.histograms[i], i, m->huffman_codes, + &m->num_huffman_codes); + dc_slot_histograms.push_back(i); + } + for (size_t i = 0; i < ac_clusters.histograms.size(); ++i) { + if (i >= 4) break; + JXL_ASSERT(ac_clusters.slot_ids[i] == i); + AddJpegHuffmanCode(ac_clusters.histograms[i], 0x10 + i, m->huffman_codes, + &m->num_huffman_codes); + ac_slot_histograms.push_back(i); + } + + // Set the Huffman table indexes in the scan_infos and emit additional DHT + // segments if necessary. + size_t histogram_id = 0; + size_t num_huffman_codes_sent = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + ScanCodingInfo sci = {}; + for (int c = 0; c < cinfo->scan_info[i].comps_in_scan; ++c) { + SetJpegHuffmanCode(dc_clusters, histogram_id, 0, dc_slot_histograms, + &sci.dc_tbl_idx[c], is_baseline, m->huffman_codes, + &m->num_huffman_codes); + SetJpegHuffmanCode(ac_clusters, histogram_id, 0x10, ac_slot_histograms, + &sci.ac_tbl_idx[c], is_baseline, m->huffman_codes, + &m->num_huffman_codes); + ++histogram_id; + } + sci.num_huffman_codes = m->num_huffman_codes - num_huffman_codes_sent; + num_huffman_codes_sent = m->num_huffman_codes; + memcpy(&m->scan_coding_info[i], &sci, sizeof(sci)); + } +} + +} // namespace jpegli -- cgit v1.2.3