diff options
Diffstat (limited to 'third_party/jpeg-xl/lib/jpegli/encode_finish.cc')
-rw-r--r-- | third_party/jpeg-xl/lib/jpegli/encode_finish.cc | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jpegli/encode_finish.cc b/third_party/jpeg-xl/lib/jpegli/encode_finish.cc new file mode 100644 index 0000000000..955676bdee --- /dev/null +++ b/third_party/jpeg-xl/lib/jpegli/encode_finish.cc @@ -0,0 +1,230 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode_finish.h" + +#include <cmath> +#include <limits> + +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/quant.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc" +#include <hwy/foreach_target.h> +#include <hwy/highway.h> + +#include "lib/jpegli/dct-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::GetLane; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>; + +void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength, + const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k); + } +} + +float BlockError(const int16_t* block, const float* qmc, const float* iqmc, + const float aq_strength, const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + auto err = Zero(d); + const auto scale = Set(d, 1.0 / 16); + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + const auto invq = Load(d, iqmc + k); + const auto rval = Mul(iqval, invq); + const auto diff = Mul(Sub(val, rval), scale); + err = Add(err, Mul(diff, diff)); + } + return GetLane(SumOfLanes(d, err)); +} + +void ComputeInverseWeights(const float* qmc, float* iqmc) { + for (int k = 0; k < 64; ++k) { + iqmc[k] = 1.0f / qmc[k]; + } +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + double error = 0.0; + size_t num = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + HWY_ALIGN float iqmc[64]; + ComputeInverseWeights(qmc, iqmc); + for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) { + error += BlockError(&ba[0][bx][0], qmc, iqmc, qf[bx * h_factor], + zero_bias_offset, zero_bias_mul); + num += DCTSIZE2; + } + } + } + return 4.3429448f * log(num / (error / 255. / 255.)); +} + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + ReQuantizeBlock(&ba[0][bx][0], qmc, qf[bx * h_factor], zero_bias_offset, + zero_bias_mul); + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +namespace { +HWY_EXPORT(ComputePSNR); +HWY_EXPORT(ReQuantizeCoeffs); + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo); +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling); +} + +void UpdateDistance(j_compress_ptr cinfo, float distance) { + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true); +} + +float Clamp(float val, float minval, float maxval) { + return std::max(minval, std::min(maxval, val)); +} + +#define PSNR_SEARCH_DBG 0 + +float FindDistanceForPSNR(j_compress_ptr cinfo) { + constexpr int kMaxIters = 20; + const float psnr_target = cinfo->master->psnr_target; + const float tolerance = cinfo->master->psnr_tolerance; + const float min_dist = cinfo->master->min_distance; + const float max_dist = cinfo->master->max_distance; + float d = Clamp(1.0f, min_dist, max_dist); + for (int sampling : {4, 1}) { + float best_diff = std::numeric_limits<float>::max(); + float best_distance = 0.0f; + float best_psnr = 0.0; + float dmin = min_dist; + float dmax = max_dist; + bool found_lower_bound = false; + bool found_upper_bound = false; + for (int i = 0; i < kMaxIters; ++i) { + UpdateDistance(cinfo, d); + float psnr = ComputePSNR(cinfo, sampling); + if (psnr > psnr_target) { + dmin = d; + found_lower_bound = true; + } else { + dmax = d; + found_upper_bound = true; + } +#if (PSNR_SEARCH_DBG > 1) + printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr); + if (found_upper_bound && found_lower_bound) { + printf(" d-interval: [ %7.4f .. %7.4f ]", dmin, dmax); + } + printf("\n"); +#endif + float diff = std::abs(psnr - psnr_target); + if (diff < best_diff) { + best_diff = diff; + best_distance = d; + best_psnr = psnr; + } + if (diff < tolerance * psnr_target || dmin == dmax) { + break; + } + if (!found_lower_bound || !found_upper_bound) { + d *= std::exp(0.15f * (psnr - psnr_target)); + } else { + d = 0.5f * (dmin + dmax); + } + d = Clamp(d, min_dist, max_dist); + } + d = best_distance; + if (sampling == 1 && PSNR_SEARCH_DBG) { + printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d); + } + } + return d; +} + +} // namespace + +void QuantizetoPSNR(j_compress_ptr cinfo) { + float distance = FindDistanceForPSNR(cinfo); + UpdateDistance(cinfo, distance); + ReQuantizeCoeffs(cinfo); +} + +} // namespace jpegli +#endif // HWY_ONCE |