From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/jpeg-xl/lib/jxl/dec_cache.h | 265 ++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 third_party/jpeg-xl/lib/jxl/dec_cache.h (limited to 'third_party/jpeg-xl/lib/jxl/dec_cache.h') diff --git a/third_party/jpeg-xl/lib/jxl/dec_cache.h b/third_party/jpeg-xl/lib/jxl/dec_cache.h new file mode 100644 index 0000000000..d4cc7a1957 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/dec_cache.h @@ -0,0 +1,265 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_CACHE_H_ +#define LIB_JXL_DEC_CACHE_H_ + +#include +#include +#include + +#include +#include +#include +#include // HWY_ALIGN_MAX +#include +#include + +#include "hwy/aligned_allocator.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/common.h" // kMaxNumPasses +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/frame_dimensions.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/passes_state.h" +#include "lib/jxl/render_pipeline/render_pipeline.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" +#include "lib/jxl/render_pipeline/stage_upsampling.h" + +namespace jxl { + +constexpr size_t kSigmaBorder = 1; +constexpr size_t kSigmaPadding = 2; + +struct PixelCallback { + PixelCallback() = default; + PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run, + JxlImageOutDestroyCallback destroy, void* init_opaque) + : init(init), run(run), destroy(destroy), init_opaque(init_opaque) { +#if JXL_ENABLE_ASSERT + const bool has_init = init != nullptr; + const bool has_run = run != nullptr; + const bool has_destroy = destroy != nullptr; + JXL_ASSERT(has_init == has_run && has_run == has_destroy); +#endif + } + + bool IsPresent() const { return run != nullptr; } + + void* Init(size_t num_threads, size_t num_pixels) const { + return init(init_opaque, num_threads, num_pixels); + } + + JxlImageOutInitCallback init = nullptr; + JxlImageOutRunCallback run = nullptr; + JxlImageOutDestroyCallback destroy = nullptr; + void* init_opaque = nullptr; +}; + +struct ImageOutput { + // Pixel format of the output pixels, used for buffer and callback output. + JxlPixelFormat format; + // Output bit depth for unsigned data types, used for float to int conversion. + size_t bits_per_sample; + // Callback for line-by-line output. + PixelCallback callback; + // Pixel buffer for image output. + void* buffer; + size_t buffer_size; + // Length of a row of image_buffer in bytes (based on oriented width). + size_t stride; +}; + +// Per-frame decoder state. All the images here should be accessed through a +// group rect (either with block units or pixel units). +struct PassesDecoderState { + PassesSharedState shared_storage; + // Allows avoiding copies for encoder loop. + const PassesSharedState* JXL_RESTRICT shared = &shared_storage; + + // 8x upsampling stage for DC. + std::unique_ptr upsampler8x; + + // For ANS decoding. + std::vector code; + std::vector> context_map; + + // Multiplier to be applied to the quant matrices of the x channel. + float x_dm_multiplier; + float b_dm_multiplier; + + // Sigma values for EPF. + ImageF sigma; + + // Image dimensions before applying undo_orientation. + size_t width; + size_t height; + ImageOutput main_output; + std::vector extra_output; + + // Whether to use int16 float-XYB-to-uint8-srgb conversion. + bool fast_xyb_srgb8_conversion; + + // If true, the RGBA output will be unpremultiplied before writing to the + // output. + bool unpremul_alpha; + + // The render pipeline will apply this orientation to bring the image to the + // intended display orientation. + Orientation undo_orientation; + + // Used for seeding noise. + size_t visible_frame_index = 0; + size_t nonvisible_frame_index = 0; + + // Keep track of the transform types used. + std::atomic used_acs{0}; + + // Storage for coefficients if in "accumulate" mode. + std::unique_ptr coefficients = make_unique>(0, 0); + + // Rendering pipeline. + std::unique_ptr render_pipeline; + + // Storage for the current frame if it can be referenced by future frames. + ImageBundle frame_storage_for_referencing; + + struct PipelineOptions { + bool use_slow_render_pipeline; + bool coalescing; + bool render_spotcolors; + bool render_noise; + }; + + Status PreparePipeline(const FrameHeader& frame_header, ImageBundle* decoded, + PipelineOptions options); + + // Information for colour conversions. + OutputEncodingInfo output_encoding_info; + + // Initializes decoder-specific structures using information from *shared. + Status Init(const FrameHeader& frame_header) { + x_dm_multiplier = std::pow(1 / (1.25f), frame_header.x_qm_scale - 2.0f); + b_dm_multiplier = std::pow(1 / (1.25f), frame_header.b_qm_scale - 2.0f); + + main_output.callback = PixelCallback(); + main_output.buffer = nullptr; + extra_output.clear(); + + fast_xyb_srgb8_conversion = false; + unpremul_alpha = false; + undo_orientation = Orientation::kIdentity; + + used_acs = 0; + + upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3); + if (frame_header.loop_filter.epf_iters > 0) { + sigma = ImageF(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding, + shared->frame_dim.ysize_blocks + 2 * kSigmaPadding); + } + return true; + } + + // Initialize the decoder state after all of DC is decoded. + Status InitForAC(size_t num_passes, ThreadPool* pool) { + shared_storage.coeff_order_size = 0; + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + if (((1 << o) & used_acs) == 0) continue; + uint8_t ord = kStrategyOrder[o]; + shared_storage.coeff_order_size = + std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize, + shared_storage.coeff_order_size); + } + size_t sz = num_passes * shared_storage.coeff_order_size; + if (sz > shared_storage.coeff_orders.size()) { + shared_storage.coeff_orders.resize(sz); + } + return true; + } +}; + +// Temp images required for decoding a single group. Reduces memory allocations +// for large images because we only initialize min(#threads, #groups) instances. +struct GroupDecCache { + void InitOnce(size_t num_passes, size_t used_acs) { + for (size_t i = 0; i < num_passes; i++) { + if (num_nzeroes[i].xsize() == 0) { + // Allocate enough for a whole group - partial groups on the + // right/bottom border just use a subset. The valid size is passed via + // Rect. + + num_nzeroes[i] = Image3I(kGroupDimInBlocks, kGroupDimInBlocks); + } + } + size_t max_block_area = 0; + + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + AcStrategy acs = AcStrategy::FromRawStrategy(o); + if ((used_acs & (1 << o)) == 0) continue; + size_t area = + acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize; + max_block_area = std::max(area, max_block_area); + } + + if (max_block_area > max_block_area_) { + max_block_area_ = max_block_area; + // We need 3x float blocks for dequantized coefficients and 1x for scratch + // space for transforms. + float_memory_ = hwy::AllocateAligned(max_block_area_ * 7); + // We need 3x int32 or int16 blocks for quantized coefficients. + int32_memory_ = hwy::AllocateAligned(max_block_area_ * 3); + int16_memory_ = hwy::AllocateAligned(max_block_area_ * 3); + } + + dec_group_block = float_memory_.get(); + scratch_space = dec_group_block + max_block_area_ * 3; + dec_group_qblock = int32_memory_.get(); + dec_group_qblock16 = int16_memory_.get(); + } + + void InitDCBufferOnce() { + if (dc_buffer.xsize() == 0) { + dc_buffer = ImageF(kGroupDimInBlocks + kRenderPipelineXOffset * 2, + kGroupDimInBlocks + 4); + } + } + + // Scratch space used by DecGroupImpl(). + float* dec_group_block; + int32_t* dec_group_qblock; + int16_t* dec_group_qblock16; + + // For TransformToPixels. + float* scratch_space; + // Note that scratch_space is never used at the same time as dec_group_qblock. + // Moreover, only one of dec_group_qblock16 is ever used. + // TODO(veluca): figure out if we can save allocations. + + // AC decoding + Image3I num_nzeroes[kMaxNumPasses]; + + // Buffer for DC upsampling. + ImageF dc_buffer; + + private: + hwy::AlignedFreeUniquePtr float_memory_; + hwy::AlignedFreeUniquePtr int32_memory_; + hwy::AlignedFreeUniquePtr int16_memory_; + size_t max_block_area_ = 0; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_CACHE_H_ -- cgit v1.2.3