summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc')
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc274
1 files changed, 274 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc
new file mode 100644
index 0000000000..c634445e83
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc
@@ -0,0 +1,274 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+
+// Returns number of non-zero coefficients (but skip LLF).
+// We cannot rely on block[] being all-zero bits, so first truncate to integer.
+// Also writes the per-8x8 block nzeros starting at nzeros_pos.
+int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
+ const AcStrategy acs, const size_t covered_blocks,
+ const size_t log2_covered_blocks,
+ const int32_t* JXL_RESTRICT block,
+ const size_t nzeros_stride,
+ int32_t* JXL_RESTRICT nzeros_pos) {
+ const HWY_CAPPED(int32_t, kBlockDim) di;
+
+ const auto zero = Zero(di);
+ // Add FF..FF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+
+ {
+ // Mask sufficient for one row of coefficients.
+ HWY_ALIGN const int32_t
+ llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
+ -1, -1, -1, -1};
+ // First cx=1,2,4 elements are FF..FF, others 0.
+ const int32_t* llf_mask_pos =
+ llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
+
+ // Rows with LLF: mask out the LLF
+ for (size_t y = 0; y < cy; y++) {
+ for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+ const auto llf_mask = LoadU(di, llf_mask_pos + x);
+
+ // LLF counts as zero so we don't include it in nzeros.
+ const auto coef =
+ AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+ }
+
+ // Remaining rows: no mask
+ for (size_t y = cy; y < cy * kBlockDim; y++) {
+ for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want area - sum_zero, add because neg_sum_zero is already negated.
+ const int32_t nzeros =
+ int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+ const int32_t shifted_nzeros = static_cast<int32_t>(
+ (nzeros + covered_blocks - 1) >> log2_covered_blocks);
+ // Need non-canonicalized dimensions!
+ for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+ for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+ nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
+ }
+ }
+
+ return nzeros;
+}
+
+// Specialization for 8x8, where only top-left is LLF/DC.
+// About 1% overall speedup vs. NumNonZeroExceptLLF.
+int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT nzeros_pos) {
+ const HWY_CAPPED(int32_t, kBlockDim) di;
+
+ const auto zero = Zero(di);
+ // Add FF..FF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+
+ {
+ // First row has DC, so mask
+ const size_t y = 0;
+ HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
+
+ for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+ const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+ // DC counts as zero so we don't include it in nzeros.
+ const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // Remaining rows: no mask
+ for (size_t y = 1; y < kBlockDim; y++) {
+ for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * kBlockDim + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+ const int32_t nzeros =
+ int32_t(kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+ *nzeros_pos = nzeros;
+
+ return nzeros;
+}
+
+// The number of nonzeros of each block is predicted from the top and the left
+// blocks, with opportune scaling to take into account the number of blocks of
+// each strategy. The predicted number of nonzeros divided by two is used as a
+// context; if this number is above 63, a specific context is used. If the
+// number of nonzeros of a strategy is above 63, it is written directly using a
+// fixed number of bits (that depends on the size of the strategy).
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+ const Rect& rect,
+ const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+ const AcStrategyImage& ac_strategy,
+ YCbCrChromaSubsampling cs,
+ Image3I* JXL_RESTRICT tmp_num_nzeroes,
+ std::vector<Token>* JXL_RESTRICT output,
+ const ImageB& qdc, const ImageI& qf,
+ const BlockCtxMap& block_ctx_map) {
+ const size_t xsize_blocks = rect.xsize();
+ const size_t ysize_blocks = rect.ysize();
+
+ // TODO(user): update the estimate: usually less coefficients are used.
+ output->reserve(output->size() +
+ 3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
+
+ size_t offset[3] = {};
+ const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
+ by >> cs.VShift(2)};
+ int32_t* JXL_RESTRICT row_nzeros[3] = {
+ tmp_num_nzeroes->PlaneRow(0, sby[0]),
+ tmp_num_nzeroes->PlaneRow(1, sby[1]),
+ tmp_num_nzeroes->PlaneRow(2, sby[2]),
+ };
+ const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
+ sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
+ sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
+ sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
+ };
+ const uint8_t* JXL_RESTRICT row_qdc =
+ qdc.ConstRow(rect.y0() + by) + rect.x0();
+ const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
+ bx >> cs.HShift(2)};
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ const size_t covered_blocks = cx * cy; // = #LLF coefficients
+ const size_t log2_covered_blocks =
+ Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
+ const size_t size = covered_blocks * kDCTBlockSize;
+
+ CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order
+
+ for (int c : {1, 0, 2}) {
+ if (sbx[c] << cs.HShift(c) != bx) continue;
+ if (sby[c] << cs.VShift(c) != by) continue;
+ const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
+
+ int32_t nzeros =
+ (covered_blocks == 1)
+ ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
+ : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
+ log2_covered_blocks, block, nzeros_stride,
+ row_nzeros[c] + sbx[c]);
+
+ int ord = kStrategyOrder[acs.RawStrategy()];
+ const coeff_order_t* JXL_RESTRICT order =
+ &orders[CoeffOrderOffset(ord, c)];
+
+ int32_t predicted_nzeros =
+ PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
+ size_t block_ctx =
+ block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
+ const int32_t nzero_ctx =
+ block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
+
+ output->emplace_back(nzero_ctx, nzeros);
+ const size_t histo_offset =
+ block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+ // Skip LLF.
+ size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
+ for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+ int32_t coeff = block[order[k]];
+ size_t ctx =
+ histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+ log2_covered_blocks, prev);
+ uint32_t u_coeff = PackSigned(coeff);
+ output->emplace_back(ctx, u_coeff);
+ prev = coeff != 0;
+ nzeros -= prev;
+ }
+ JXL_DASSERT(nzeros == 0);
+ offset[c] += size;
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(TokenizeCoefficients);
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+ const Rect& rect,
+ const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+ const AcStrategyImage& ac_strategy,
+ YCbCrChromaSubsampling cs,
+ Image3I* JXL_RESTRICT tmp_num_nzeroes,
+ std::vector<Token>* JXL_RESTRICT output,
+ const ImageB& qdc, const ImageI& qf,
+ const BlockCtxMap& block_ctx_map) {
+ return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
+ orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
+ block_ctx_map);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE