summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc')
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc291
1 files changed, 291 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc
new file mode 100644
index 0000000000..389b53598a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc
@@ -0,0 +1,291 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+std::pair<uint32_t, uint32_t> ComputeUsedOrders(
+ const SpeedTier speed, const AcStrategyImage& ac_strategy,
+ const Rect& rect) {
+ // Only uses DCT8 = 0, so bitfield = 1.
+ if (speed >= SpeedTier::kFalcon) return {1, 1};
+
+ uint32_t ret = 0;
+ uint32_t ret_customize = 0;
+ size_t xsize_blocks = rect.xsize();
+ size_t ysize_blocks = rect.ysize();
+ // TODO(veluca): precompute when doing DCT.
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+ int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
+ // Do not customize coefficient orders for blocks bigger than 32x32.
+ ret |= 1u << ord;
+ if (ord > 6) {
+ continue;
+ }
+ ret_customize |= 1u << ord;
+ }
+ }
+ // Use default orders for small images.
+ if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
+ return {ret, ret_customize};
+}
+
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+ const AcStrategyImage& ac_strategy,
+ const FrameDimensions& frame_dim, uint32_t& used_orders,
+ uint16_t used_acs, coeff_order_t* JXL_RESTRICT order) {
+ std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
+ // If compressing at high speed and only using 8x8 DCTs, only consider a
+ // subset of blocks.
+ double block_fraction = 1.0f;
+ // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
+ // encoding significantly less dense.
+ if (speed >= SpeedTier::kSquirrel && used_orders == 1) {
+ block_fraction = 0.5f;
+ }
+ // No need to compute number of zero coefficients if all orders are the
+ // default.
+ if (used_orders != 0) {
+ uint64_t threshold =
+ (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
+ uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
+ static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
+ // Xorshift128+ adapted from xorshift128+-inl.h
+ auto use_sample = [&]() {
+ auto s1 = s[0];
+ const auto s0 = s[1];
+ const auto bits = s1 + s0; // b, c
+ s[0] = s0;
+ s1 ^= s1 << 23;
+ s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+ s[1] = s1;
+ return (bits >> 32) <= threshold;
+ };
+
+ // Count number of zero coefficients, separately for each DCT band.
+ // TODO(veluca): precompute when doing DCT.
+ for (size_t group_index = 0; group_index < frame_dim.num_groups;
+ group_index++) {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
+ kGroupDimInBlocks, kGroupDimInBlocks,
+ frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ ConstACPtr rows[3];
+ ACType type = acs.Type();
+ for (size_t c = 0; c < 3; c++) {
+ rows[c] = acs.PlaneRow(c, group_index, 0);
+ }
+ size_t ac_offset = 0;
+
+ // TODO(veluca): SIMDfy.
+ for (size_t by = 0; by < rect.ysize(); ++by) {
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < rect.xsize(); ++bx) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ if (!use_sample()) continue;
+ size_t size = kDCTBlockSize << acs.log2_covered_blocks();
+ for (size_t c = 0; c < 3; ++c) {
+ const size_t order_offset =
+ CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
+ if (type == ACType::k16) {
+ for (size_t k = 0; k < size; k++) {
+ bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
+ num_zeros[order_offset + k] += is_zero ? 1 : 0;
+ }
+ } else {
+ for (size_t k = 0; k < size; k++) {
+ bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
+ num_zeros[order_offset + k] += is_zero ? 1 : 0;
+ }
+ }
+ // Ensure LLFs are first in the order.
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ CoefficientLayout(&cy, &cx);
+ for (size_t iy = 0; iy < cy; iy++) {
+ for (size_t ix = 0; ix < cx; ix++) {
+ num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
+ }
+ }
+ }
+ ac_offset += size;
+ }
+ }
+ }
+ }
+ struct PosAndCount {
+ uint32_t pos;
+ uint32_t count;
+ };
+ auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
+
+ std::vector<coeff_order_t> natural_order_buffer;
+
+ uint16_t computed = 0;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ uint8_t ord = kStrategyOrder[o];
+ if (computed & (1 << ord)) continue;
+ computed |= 1 << ord;
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+
+ // Do nothing for transforms that don't appear.
+ if ((1 << ord) & ~used_acs) continue;
+
+ if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
+ acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
+
+ // Ensure natural coefficient order is not permuted if the order is
+ // not transmitted.
+ if ((1 << ord) & ~used_orders) {
+ for (size_t c = 0; c < 3; c++) {
+ size_t offset = CoeffOrderOffset(ord, c);
+ JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+ memcpy(&order[offset], natural_order_buffer.data(),
+ sz * sizeof(*order));
+ }
+ continue;
+ }
+
+ bool is_nondefault = false;
+ for (uint8_t c = 0; c < 3; c++) {
+ // Apply zig-zag order.
+ PosAndCount* pos_and_val = mem.get();
+ size_t offset = CoeffOrderOffset(ord, c);
+ JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+ float inv_sqrt_sz = 1.0f / std::sqrt(sz);
+ for (size_t i = 0; i < sz; ++i) {
+ size_t pos = natural_order_buffer[i];
+ pos_and_val[i].pos = pos;
+ // We don't care for the exact number -> quantize number of zeros,
+ // to get less permuted order.
+ pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
+ }
+
+ // Stable-sort -> elements with same number of zeros will preserve their
+ // order.
+ auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
+ return a.count < b.count;
+ };
+ std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
+
+ // Grab indices.
+ for (size_t i = 0; i < sz; ++i) {
+ order[offset + i] = pos_and_val[i].pos;
+ is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
+ }
+ }
+ if (!is_nondefault) {
+ used_orders &= ~(1 << ord);
+ }
+ }
+}
+
+namespace {
+
+void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+ size_t size, std::vector<Token>* tokens) {
+ std::vector<LehmerT> lehmer(size);
+ std::vector<uint32_t> temp(size + 1);
+ ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+ size_t end = size;
+ while (end > skip && lehmer[end - 1] == 0) {
+ --end;
+ }
+ tokens->emplace_back(CoeffOrderContext(size), end - skip);
+ uint32_t last = 0;
+ for (size_t i = skip; i < end; ++i) {
+ tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+ last = lehmer[i];
+ }
+}
+
+} // namespace
+
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+ size_t size, BitWriter* writer, int layer,
+ AuxOut* aux_out) {
+ std::vector<std::vector<Token>> tokens(1);
+ TokenizePermutation(order, skip, size, &tokens[0]);
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+ &codes, &context_map, writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+namespace {
+void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
+ std::vector<Token>* tokens, coeff_order_t* order_zigzag,
+ std::vector<coeff_order_t>& natural_order_lut) {
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+ for (size_t i = 0; i < size; ++i) {
+ order_zigzag[i] = natural_order_lut[order[i]];
+ }
+ TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+} // namespace
+
+void EncodeCoeffOrders(uint16_t used_orders,
+ const coeff_order_t* JXL_RESTRICT order,
+ BitWriter* writer, size_t layer,
+ AuxOut* JXL_RESTRICT aux_out) {
+ auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+ uint16_t computed = 0;
+ std::vector<std::vector<Token>> tokens(1);
+ std::vector<coeff_order_t> natural_order_lut;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ uint8_t ord = kStrategyOrder[o];
+ if (computed & (1 << ord)) continue;
+ computed |= 1 << ord;
+ if ((used_orders & (1 << ord)) == 0) continue;
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+ if (natural_order_lut.size() < size) natural_order_lut.resize(size);
+ acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
+ for (size_t c = 0; c < 3; c++) {
+ EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0],
+ mem.get(), natural_order_lut);
+ }
+ }
+ // Do not write anything if no order is used.
+ if (used_orders != 0) {
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+ &codes, &context_map, writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+ }
+}
+
+} // namespace jxl