From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- .../lib/jxl/modular/transform/enc_palette.cc | 606 +++++++++++++++++++++ .../lib/jxl/modular/transform/enc_palette.h | 22 + .../jpeg-xl/lib/jxl/modular/transform/enc_rct.cc | 73 +++ .../jpeg-xl/lib/jxl/modular/transform/enc_rct.h | 17 + .../lib/jxl/modular/transform/enc_squeeze.cc | 141 +++++ .../lib/jxl/modular/transform/enc_squeeze.h | 20 + .../lib/jxl/modular/transform/enc_transform.cc | 46 ++ .../lib/jxl/modular/transform/enc_transform.h | 22 + .../jpeg-xl/lib/jxl/modular/transform/palette.cc | 176 ++++++ .../jpeg-xl/lib/jxl/modular/transform/palette.h | 129 +++++ .../jpeg-xl/lib/jxl/modular/transform/rct.cc | 153 ++++++ .../jpeg-xl/lib/jxl/modular/transform/rct.h | 20 + .../jpeg-xl/lib/jxl/modular/transform/squeeze.cc | 478 ++++++++++++++++ .../jpeg-xl/lib/jxl/modular/transform/squeeze.h | 90 +++ .../jpeg-xl/lib/jxl/modular/transform/transform.cc | 98 ++++ .../jpeg-xl/lib/jxl/modular/transform/transform.h | 148 +++++ 16 files changed, 2239 insertions(+) create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/palette.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/rct.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/transform.h (limited to 'third_party/jpeg-xl/lib/jxl/modular/transform') diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc new file mode 100644 index 0000000000..bc31445bc5 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc @@ -0,0 +1,606 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_palette.h" + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/enc_transform.h" +#include "lib/jxl/modular/transform/palette.h" + +namespace jxl { + +namespace palette_internal { + +static constexpr bool kEncodeToHighQualityImplicitPalette = true; + +// Inclusive. +static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1); + +float ColorDistance(const std::vector &JXL_RESTRICT a, + const std::vector &JXL_RESTRICT b) { + JXL_ASSERT(a.size() == b.size()); + float distance = 0; + float ave3 = 0; + if (a.size() >= 3) { + ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f); + } + float sum_a = 0, sum_b = 0; + for (size_t c = 0; c < a.size(); ++c) { + const float difference = + static_cast(a[c]) - static_cast(b[c]); + float weight = c == 0 ? 3 : c == 1 ? 5 : 2; + if (c < 3 && (a[c] + b[c] >= ave3)) { + const float add_w[3] = { + 1.15, + 1.15, + 1.12, + }; + weight += add_w[c]; + if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) { + weight -= 0.5; + } + } + distance += difference * difference * weight * weight; + const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1; + sum_a += a[c] * sum_weight; + sum_b += b[c] * sum_weight; + } + distance *= 4; + float sum_difference = sum_a - sum_b; + distance += sum_difference * sum_difference; + return distance; +} + +static int QuantizeColorToImplicitPaletteIndex( + const std::vector &color, const int palette_size, + const int bit_depth, bool high_quality) { + int index = 0; + if (high_quality) { + int multiplier = 1; + for (size_t c = 0; c < color.size(); c++) { + int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) / + ((1 << bit_depth) - 1); + JXL_ASSERT((quantized % kLargeCube) == quantized); + index += quantized * multiplier; + multiplier *= kLargeCube; + } + return index + palette_size + kLargeCubeOffset; + } else { + int multiplier = 1; + for (size_t c = 0; c < color.size(); c++) { + int value = color[c]; + value -= 1 << (std::max(0, bit_depth - 3)); + value = std::max(0, value); + int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) / + ((1 << bit_depth) - 1); + JXL_ASSERT((quantized % kLargeCube) == quantized); + if (quantized > kSmallCube - 1) { + quantized = kSmallCube - 1; + } + index += quantized * multiplier; + multiplier *= kSmallCube; + } + return index + palette_size; + } +} + +} // namespace palette_internal + +int RoundInt(int value, int div) { // symmetric rounding around 0 + if (value < 0) return -RoundInt(-value, div); + return (value + div / 2) / div; +} + +struct PaletteIterationData { + static constexpr int kMaxDeltas = 128; + bool final_run = false; + std::vector deltas[3]; + std::vector delta_distances; + std::vector frequent_deltas[3]; + + // Populates `frequent_deltas` with items from `deltas` based on frequencies + // and color distances. + void FindFrequentColorDeltas(int num_pixels, int bitdepth) { + using pixel_type_3d = std::array; + std::map delta_frequency_map; + pixel_type bucket_size = 3 << std::max(0, bitdepth - 8); + // Store frequency weighted by delta distance from quantized value. + for (size_t i = 0; i < deltas[0].size(); ++i) { + pixel_type_3d delta = { + {RoundInt(deltas[0][i], bucket_size), + RoundInt(deltas[1][i], bucket_size), + RoundInt(deltas[2][i], bucket_size)}}; // a basic form of clustering + if (delta[0] == 0 && delta[1] == 0 && delta[2] == 0) continue; + delta_frequency_map[delta] += sqrt(sqrt(delta_distances[i])); + } + + const float delta_distance_multiplier = 1.0f / num_pixels; + + // Weigh frequencies by magnitude and normalize. + for (auto &delta_frequency : delta_frequency_map) { + std::vector current_delta = {delta_frequency.first[0], + delta_frequency.first[1], + delta_frequency.first[2]}; + float delta_distance = + sqrt(palette_internal::ColorDistance({0, 0, 0}, current_delta)) + 1; + delta_frequency.second *= delta_distance * delta_distance_multiplier; + } + + // Sort by weighted frequency. + using pixel_type_3d_frequency = std::pair; + std::vector sorted_delta_frequency_map( + delta_frequency_map.begin(), delta_frequency_map.end()); + std::sort( + sorted_delta_frequency_map.begin(), sorted_delta_frequency_map.end(), + [](const pixel_type_3d_frequency &a, const pixel_type_3d_frequency &b) { + return a.second > b.second; + }); + + // Store the top deltas. + for (auto &delta_frequency : sorted_delta_frequency_map) { + if (frequent_deltas[0].size() >= kMaxDeltas) break; + // Number obtained by optimizing on jyrki31 corpus: + if (delta_frequency.second < 17) break; + for (int c = 0; c < 3; ++c) { + frequent_deltas[c].push_back(delta_frequency.first[c] * bucket_size); + } + } + } +}; + +Status FwdPaletteIteration(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, + bool ordered, bool lossy, Predictor &predictor, + const weighted::Header &wp_header, + PaletteIterationData &palette_iteration_data) { + JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c)); + JXL_ASSERT(begin_c >= input.nb_meta_channels); + uint32_t nb = end_c - begin_c + 1; + + size_t w = input.channel[begin_c].w; + size_t h = input.channel[begin_c].h; + + if (!lossy && nb == 1) { + // Channel palette special case + if (nb_colors == 0) return false; + std::vector lookup; + pixel_type minval, maxval; + compute_minmax(input.channel[begin_c], &minval, &maxval); + size_t lookup_table_size = + static_cast(maxval) - static_cast(minval) + 1; + if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) { + // a lookup table would use too much memory, instead use a slower approach + // with std::set + std::set chpalette; + pixel_type idx = 0; + for (size_t y = 0; y < h; y++) { + const pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + const bool new_color = chpalette.insert(p[x]).second; + if (new_color) { + idx++; + if (idx > (int)nb_colors) return false; + } + } + } + JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx); + Channel pch(idx, 1); + pch.hshift = -1; + pch.vshift = -1; + nb_colors = idx; + idx = 0; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + for (pixel_type p : chpalette) { + p_palette[idx++] = p; + } + for (size_t y = 0; y < h; y++) { + pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + for (idx = 0; p[x] != p_palette[idx] && idx < (int)nb_colors; idx++) { + } + JXL_DASSERT(idx < (int)nb_colors); + p[x] = idx; + } + } + predictor = Predictor::Zero; + input.nb_meta_channels++; + input.channel.insert(input.channel.begin(), std::move(pch)); + + return true; + } + lookup.resize(lookup_table_size, 0); + pixel_type idx = 0; + for (size_t y = 0; y < h; y++) { + const pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + if (lookup[p[x] - minval] == 0) { + lookup[p[x] - minval] = 1; + idx++; + if (idx > (int)nb_colors) return false; + } + } + } + JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx); + Channel pch(idx, 1); + pch.hshift = -1; + pch.vshift = -1; + nb_colors = idx; + idx = 0; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + for (size_t i = 0; i < lookup_table_size; i++) { + if (lookup[i]) { + p_palette[idx] = i + minval; + lookup[i] = idx; + idx++; + } + } + for (size_t y = 0; y < h; y++) { + pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval]; + } + predictor = Predictor::Zero; + input.nb_meta_channels++; + input.channel.insert(input.channel.begin(), std::move(pch)); + return true; + } + + Image quantized_input; + if (lossy) { + quantized_input = Image(w, h, input.bitdepth, nb); + for (size_t c = 0; c < nb; c++) { + CopyImageTo(input.channel[begin_c + c].plane, + &quantized_input.channel[c].plane); + } + } + + JXL_DEBUG_V( + 7, "Trying to represent channels %i-%i using at most a %i-color palette.", + begin_c, end_c, nb_colors); + nb_deltas = 0; + bool delta_used = false; + std::set> + candidate_palette; // ordered lexicographically + std::vector> candidate_palette_imageorder; + std::vector color(nb); + std::vector color_with_error(nb); + std::vector p_in(nb); + + if (lossy) { + palette_iteration_data.FindFrequentColorDeltas(w * h, input.bitdepth); + nb_deltas = palette_iteration_data.frequent_deltas[0].size(); + + // Count color frequency for colors that make a cross. + std::map, size_t> color_freq_map; + for (size_t y = 1; y + 1 < h; y++) { + for (uint32_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + } + for (size_t x = 1; x + 1 < w; x++) { + for (uint32_t c = 0; c < nb; c++) { + color[c] = p_in[c][x]; + } + int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}}; + bool makes_cross = true; + for (int i = 0; i < 4 && makes_cross; ++i) { + int dx = offsets[i][0]; + int dy = offsets[i][1]; + for (uint32_t c = 0; c < nb && makes_cross; c++) { + if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) { + makes_cross = false; + } + } + } + if (makes_cross) color_freq_map[color] += 1; + } + } + // Add colors satisfying frequency condition to the palette. + constexpr float kImageFraction = 0.01f; + size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction; + for (const auto &color_freq : color_freq_map) { + if (color_freq.second > color_frequency_lower_bound) { + candidate_palette.insert(color_freq.first); + candidate_palette_imageorder.push_back(color_freq.first); + } + } + } + + for (size_t y = 0; y < h; y++) { + for (uint32_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + } + for (size_t x = 0; x < w; x++) { + if (lossy && candidate_palette.size() >= nb_colors) break; + for (uint32_t c = 0; c < nb; c++) { + color[c] = p_in[c][x]; + } + const bool new_color = candidate_palette.insert(color).second; + if (new_color) { + candidate_palette_imageorder.push_back(color); + } + if (candidate_palette.size() > nb_colors) { + return false; // too many colors + } + } + } + + nb_colors = nb_deltas + candidate_palette.size(); + JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.", + begin_c, end_c, nb_colors); + + Channel pch(nb_colors, nb); + pch.hshift = -1; + pch.vshift = -1; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + intptr_t onerow = pch.plane.PixelsPerRow(); + intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow(); + const int bit_depth = std::min(input.bitdepth, 24); + + if (lossy) { + for (uint32_t i = 0; i < nb_deltas; i++) { + for (size_t c = 0; c < 3; c++) { + p_palette[c * onerow + i] = + palette_iteration_data.frequent_deltas[c][i]; + } + } + } + + int x = 0; + if (ordered) { + JXL_DEBUG_V(7, "Palette of %i colors, using lexicographic order", + nb_colors); + for (auto pcol : candidate_palette) { + JXL_DEBUG_V(9, " Color %i : ", x); + for (size_t i = 0; i < nb; i++) { + p_palette[nb_deltas + i * onerow + x] = pcol[i]; + } + for (size_t i = 0; i < nb; i++) { + JXL_DEBUG_V(9, "%i ", pcol[i]); + } + x++; + } + } else { + JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors); + for (auto pcol : candidate_palette_imageorder) { + JXL_DEBUG_V(9, " Color %i : ", x); + for (size_t i = 0; i < nb; i++) + p_palette[nb_deltas + i * onerow + x] = pcol[i]; + for (size_t i = 0; i < nb; i++) JXL_DEBUG_V(9, "%i ", pcol[i]); + x++; + } + } + std::vector wp_states; + for (size_t c = 0; c < nb; c++) { + wp_states.emplace_back(wp_header, w, h); + } + std::vector p_quant(nb); + // Three rows of error for dithering: y to y + 2. + // Each row has two pixels of padding in the ends, which is + // beneficial for both precision and encoding speed. + std::vector> error_row[3]; + if (lossy) { + for (int i = 0; i < 3; ++i) { + error_row[i].resize(nb); + for (size_t c = 0; c < nb; ++c) { + error_row[i][c].resize(w + 4); + } + } + } + for (size_t y = 0; y < h; y++) { + for (size_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + if (lossy) p_quant[c] = quantized_input.channel[c].Row(y); + } + pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + int index; + if (!lossy) { + for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x]; + // Exact search. + for (index = 0; static_cast(index) < nb_colors; index++) { + bool found = true; + for (size_t c = 0; c < nb; c++) { + if (color[c] != p_palette[c * onerow + index]) { + found = false; + break; + } + } + if (found) break; + } + if (index < static_cast(nb_deltas)) { + delta_used = true; + } + } else { + int best_index = 0; + bool best_is_delta = false; + float best_distance = std::numeric_limits::infinity(); + std::vector best_val(nb, 0); + std::vector ideal_residual(nb, 0); + std::vector quantized_val(nb); + std::vector predictions(nb); + static const double kDiffusionMultiplier[] = {0.55, 0.75}; + for (int diffusion_index = 0; diffusion_index < 2; ++diffusion_index) { + for (size_t c = 0; c < nb; c++) { + color_with_error[c] = + p_in[c][x] + palette_iteration_data.final_run * + kDiffusionMultiplier[diffusion_index] * + error_row[0][c][x + 2]; + color[c] = Clamp1(lroundf(color_with_error[c]), 0l, + (1l << input.bitdepth) - 1); + } + + for (size_t c = 0; c < nb; ++c) { + predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x, + y, predictor, &wp_states[c]) + .guess; + } + const auto TryIndex = [&](const int index) { + for (size_t c = 0; c < nb; c++) { + quantized_val[c] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/nb_colors, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + quantized_val[c] += predictions[c]; + } + } + const float color_distance = + 32.0 / (1LL << std::max(0, 2 * (bit_depth - 8))) * + palette_internal::ColorDistance(color_with_error, + quantized_val); + float index_penalty = 0; + if (index == -1) { + index_penalty = -124; + } else if (index < 0) { + index_penalty = -2 * index; + } else if (index < static_cast(nb_deltas)) { + index_penalty = 250; + } else if (index < static_cast(nb_colors)) { + index_penalty = 150; + } else if (index < static_cast(nb_colors) + + palette_internal::kLargeCubeOffset) { + index_penalty = 70; + } else { + index_penalty = 256; + } + const float distance = color_distance + index_penalty; + if (distance < best_distance) { + best_distance = distance; + best_index = index; + best_is_delta = index < static_cast(nb_deltas); + best_val.swap(quantized_val); + for (size_t c = 0; c < nb; ++c) { + ideal_residual[c] = color_with_error[c] - predictions[c]; + } + } + }; + for (index = palette_internal::kMinImplicitPaletteIndex; + index < static_cast(nb_colors); index++) { + TryIndex(index); + } + TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex( + color, nb_colors, bit_depth, + /*high_quality=*/false)); + if (palette_internal::kEncodeToHighQualityImplicitPalette) { + TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex( + color, nb_colors, bit_depth, + /*high_quality=*/true)); + } + } + index = best_index; + delta_used |= best_is_delta; + if (!palette_iteration_data.final_run) { + for (size_t c = 0; c < 3; ++c) { + palette_iteration_data.deltas[c].push_back(ideal_residual[c]); + } + palette_iteration_data.delta_distances.push_back(best_distance); + } + + for (size_t c = 0; c < nb; ++c) { + wp_states[c].UpdateErrors(best_val[c], x, y, w); + p_quant[c][x] = best_val[c]; + } + float len_error = 0; + for (size_t c = 0; c < nb; ++c) { + float local_error = color_with_error[c] - best_val[c]; + len_error += local_error * local_error; + } + len_error = sqrt(len_error); + float modulate = 1.0; + int len_limit = 38 << std::max(0, bit_depth - 8); + if (len_error > len_limit) { + modulate *= len_limit / len_error; + } + for (size_t c = 0; c < nb; ++c) { + float total_error = (color_with_error[c] - best_val[c]); + + // If the neighboring pixels have some error in the opposite + // direction of total_error, cancel some or all of it out before + // spreading among them. + constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1}, + {1, 3}, {2, 2}, {1, 0}, {1, 4}, + {2, 1}, {2, 3}, {2, 0}, {2, 4}}; + float total_available = 0; + for (int i = 0; i < 11; ++i) { + const int row = offsets[i][0]; + const int col = offsets[i][1]; + if (std::signbit(error_row[row][c][x + col]) != + std::signbit(total_error)) { + total_available += error_row[row][c][x + col]; + } + } + float weight = + std::abs(total_error) / (std::abs(total_available) + 1e-3); + weight = std::min(weight, 1.0f); + for (int i = 0; i < 11; ++i) { + const int row = offsets[i][0]; + const int col = offsets[i][1]; + if (std::signbit(error_row[row][c][x + col]) != + std::signbit(total_error)) { + total_error += weight * error_row[row][c][x + col]; + error_row[row][c][x + col] *= (1 - weight); + } + } + total_error *= modulate; + const float remaining_error = (1.0f / 14.) * total_error; + error_row[0][c][x + 3] += 2 * remaining_error; + error_row[0][c][x + 4] += remaining_error; + error_row[1][c][x + 0] += remaining_error; + for (int i = 0; i < 5; ++i) { + error_row[1][c][x + i] += remaining_error; + error_row[2][c][x + i] += remaining_error; + } + } + } + if (palette_iteration_data.final_run) p[x] = index; + } + if (lossy) { + for (size_t c = 0; c < nb; ++c) { + error_row[0][c].swap(error_row[1][c]); + error_row[1][c].swap(error_row[2][c]); + std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f); + } + } + } + if (!delta_used) { + predictor = Predictor::Zero; + } + if (palette_iteration_data.final_run) { + input.nb_meta_channels++; + input.channel.erase(input.channel.begin() + begin_c + 1, + input.channel.begin() + end_c + 1); + input.channel.insert(input.channel.begin(), std::move(pch)); + } + nb_colors -= nb_deltas; + return true; +} + +Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered, + bool lossy, Predictor &predictor, + const weighted::Header &wp_header) { + PaletteIterationData palette_iteration_data; + uint32_t nb_colors_orig = nb_colors; + uint32_t nb_deltas_orig = nb_deltas; + // preprocessing pass in case of lossy palette + if (lossy && input.bitdepth >= 8) { + JXL_RETURN_IF_ERROR(FwdPaletteIteration( + input, begin_c, end_c, nb_colors_orig, nb_deltas_orig, ordered, lossy, + predictor, wp_header, palette_iteration_data)); + } + palette_iteration_data.final_run = true; + return FwdPaletteIteration(input, begin_c, end_c, nb_colors, nb_deltas, + ordered, lossy, predictor, wp_header, + palette_iteration_data); +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h new file mode 100644 index 0000000000..0f3d66825b --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered, + bool lossy, Predictor &predictor, + const weighted::Header &wp_header); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc new file mode 100644 index 0000000000..050563a3c2 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc @@ -0,0 +1,73 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_rct.h" + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +Status FwdRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); + if (rct_type == 0) { // noop + return false; + } + // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR + int permutation = rct_type / 7; + // 0-5 values have the low bit corresponding to Third and the high bits + // corresponding to Second. 6 corresponds to YCoCg. + // + // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird + // + // Third: 0=nop, 1=SubtractFirst + int custom = rct_type % 7; + size_t m = begin_c; + size_t w = input.channel[m + 0].w; + size_t h = input.channel[m + 0].h; + int second = (custom % 7) >> 1; + int third = (custom % 7) & 1; + const auto do_rct = [&](const int y, const int thread) { + const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y); + const pixel_type* in1 = + input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); + const pixel_type* in2 = + input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); + pixel_type* out0 = input.channel[m].Row(y); + pixel_type* out1 = input.channel[m + 1].Row(y); + pixel_type* out2 = input.channel[m + 2].Row(y); + if (custom == 6) { + for (size_t x = 0; x < w; x++) { + pixel_type R = in0[x]; + pixel_type G = in1[x]; + pixel_type B = in2[x]; + out1[x] = R - B; + pixel_type tmp = B + (out1[x] >> 1); + out2[x] = G - tmp; + out0[x] = tmp + (out2[x] >> 1); + } + } else { + for (size_t x = 0; x < w; x++) { + pixel_type First = in0[x]; + pixel_type Second = in1[x]; + pixel_type Third = in2[x]; + if (second == 1) { + Second = Second - First; + } else if (second == 2) { + Second = Second - ((First + Third) >> 1); + } + if (third) Third = Third - First; + out0[x] = First; + out1[x] = Second; + out2[x] = Third; + } + } + }; + return RunOnPool(pool, 0, h, ThreadPool::NoInit, do_rct, "FwdRCT"); +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h new file mode 100644 index 0000000000..cb5a193c8d --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ + +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +Status FwdRCT(Image &input, size_t begin_c, size_t rct_type, ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc new file mode 100644 index 0000000000..dfd90cde68 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_squeeze.h" + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/squeeze.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +void FwdHSqueeze(Image &input, int c, int rc) { + const Channel &chin = input.channel[c]; + + JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c, + rc); + + Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift); + Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1, + chin.vshift); + + for (size_t y = 0; y < chout.h; y++) { + const pixel_type *JXL_RESTRICT p_in = chin.Row(y); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y); + for (size_t x = 0; x < chout_residual.w; x++) { + pixel_type A = p_in[x * 2]; + pixel_type B = p_in[x * 2 + 1]; + pixel_type avg = (A + B + (A > B)) >> 1; + p_out[x] = avg; + + pixel_type diff = A - B; + + pixel_type next_avg = avg; + if (x + 1 < chout_residual.w) { + next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] + + (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >> + 1; // which will be chout.value(y,x+1) + } else if (chin.w & 1) + next_avg = p_in[x * 2 + 2]; + pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg); + pixel_type tendency = SmoothTendency(left, avg, next_avg); + + p_res[x] = diff - tendency; + } + if (chin.w & 1) { + int x = chout.w - 1; + p_out[x] = p_in[x * 2]; + } + } + input.channel[c] = std::move(chout); + input.channel.insert(input.channel.begin() + rc, std::move(chout_residual)); +} + +void FwdVSqueeze(Image &input, int c, int rc) { + const Channel &chin = input.channel[c]; + + JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c, + rc); + + Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1); + Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift, + chin.vshift + 1); + intptr_t onerow_in = chin.plane.PixelsPerRow(); + for (size_t y = 0; y < chout_residual.h; y++) { + const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y); + for (size_t x = 0; x < chout.w; x++) { + pixel_type A = p_in[x]; + pixel_type B = p_in[x + onerow_in]; + pixel_type avg = (A + B + (A > B)) >> 1; + p_out[x] = avg; + + pixel_type diff = A - B; + + pixel_type next_avg = avg; + if (y + 1 < chout_residual.h) { + next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] + + (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >> + 1; // which will be chout.value(y+1,x) + } else if (chin.h & 1) { + next_avg = p_in[x + 2 * onerow_in]; + } + pixel_type top = + (y > 0 ? p_in[static_cast(x) - onerow_in] : avg); + pixel_type tendency = SmoothTendency(top, avg, next_avg); + + p_res[x] = diff - tendency; + } + } + if (chin.h & 1) { + size_t y = chout.h - 1; + const pixel_type *p_in = chin.Row(y * 2); + pixel_type *p_out = chout.Row(y); + for (size_t x = 0; x < chout.w; x++) { + p_out[x] = p_in[x]; + } + } + input.channel[c] = std::move(chout); + input.channel.insert(input.channel.begin() + rc, std::move(chout_residual)); +} + +Status FwdSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + if (parameters.empty()) { + DefaultSqueezeParameters(¶meters, input); + } + // if nothing to do, don't do squeeze + if (parameters.empty()) return false; + for (size_t i = 0; i < parameters.size(); i++) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams(parameters[i], input.channel.size())); + bool horizontal = parameters[i].horizontal; + bool in_place = parameters[i].in_place; + uint32_t beginc = parameters[i].begin_c; + uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1; + uint32_t offset; + if (in_place) { + offset = endc + 1; + } else { + offset = input.channel.size(); + } + for (uint32_t c = beginc; c <= endc; c++) { + if (horizontal) { + FwdHSqueeze(input, c, offset + c - beginc); + } else { + FwdVSqueeze(input, c, offset + c - beginc); + } + } + } + return true; +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h new file mode 100644 index 0000000000..39b001017b --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +Status FwdSqueeze(Image &input, std::vector parameters, + ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc new file mode 100644 index 0000000000..bdaaf9f87e --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc @@ -0,0 +1,46 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_transform.h" + +#include "lib/jxl/modular/transform/enc_palette.h" +#include "lib/jxl/modular/transform/enc_rct.h" +#include "lib/jxl/modular/transform/enc_squeeze.h" + +namespace jxl { + +Status TransformForward(Transform &t, Image &input, + const weighted::Header &wp_header, ThreadPool *pool) { + switch (t.id) { + case TransformId::kRCT: + return FwdRCT(input, t.begin_c, t.rct_type, pool); + case TransformId::kSqueeze: + return FwdSqueeze(input, t.squeezes, pool); + case TransformId::kPalette: + return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors, + t.nb_deltas, t.ordered_palette, t.lossy_palette, + t.predictor, wp_header); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(t.id)); + } +} + +void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) { + pixel_type realmin = std::numeric_limits::max(); + pixel_type realmax = std::numeric_limits::min(); + for (size_t y = 0; y < ch.h; y++) { + const pixel_type *JXL_RESTRICT p = ch.Row(y); + for (size_t x = 0; x < ch.w; x++) { + if (p[x] < realmin) realmin = p[x]; + if (p[x] > realmax) realmax = p[x]; + } + } + + if (min) *min = realmin; + if (max) *max = realmax; +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h new file mode 100644 index 0000000000..07659e1b0a --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +Status TransformForward(Transform &t, Image &input, + const weighted::Header &wp_header, ThreadPool *pool); + +void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc new file mode 100644 index 0000000000..46129f19f0 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc @@ -0,0 +1,176 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/palette.h" + +namespace jxl { + +Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors, + uint32_t nb_deltas, Predictor predictor, + const weighted::Header &wp_header, ThreadPool *pool) { + if (input.nb_meta_channels < 1) { + return JXL_FAILURE("Error: Palette transform without palette."); + } + std::atomic num_errors{0}; + int nb = input.channel[0].h; + uint32_t c0 = begin_c + 1; + if (c0 >= input.channel.size()) { + return JXL_FAILURE("Channel is out of range."); + } + size_t w = input.channel[c0].w; + size_t h = input.channel[c0].h; + if (nb < 1) return JXL_FAILURE("Corrupted transforms"); + for (int i = 1; i < nb; i++) { + input.channel.insert( + input.channel.begin() + c0 + 1, + Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift)); + } + const Channel &palette = input.channel[0]; + const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0); + intptr_t onerow = input.channel[0].plane.PixelsPerRow(); + intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow(); + const int bit_depth = std::min(input.bitdepth, 24); + + if (w == 0) { + // Nothing to do. + // Avoid touching "empty" channels with non-zero height. + } else if (nb_deltas == 0 && predictor == Predictor::Zero) { + if (nb == 1) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + pixel_type *p = input.channel[c0].Row(y); + for (size_t x = 0; x < w; x++) { + const int index = Clamp1(p[x], 0, (pixel_type)palette.w - 1); + p[x] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/0, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + } + }, + "UndoChannelPalette")); + } else { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + std::vector p_out(nb); + const pixel_type *p_index = input.channel[c0].Row(y); + for (int c = 0; c < nb; c++) + p_out[c] = input.channel[c0 + c].Row(y); + for (size_t x = 0; x < w; x++) { + const int index = p_index[x]; + for (int c = 0; c < nb; c++) { + p_out[c][x] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + } + } + }, + "UndoPalette")); + } + } else { + // Parallelized per channel. + ImageI indices = CopyImage(input.channel[c0].plane); + if (predictor == Predictor::Weighted) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, nb, ThreadPool::NoInit, + [&](const uint32_t c, size_t /* thread */) { + Channel &channel = input.channel[c0 + c]; + weighted::State wp_state(wp_header, channel.w, channel.h); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + const pixel_type *JXL_RESTRICT idx = indices.Row(y); + for (size_t x = 0; x < channel.w; x++) { + int index = idx[x]; + pixel_type_w val = 0; + const pixel_type palette_entry = + palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, /*onerow=*/onerow, + /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + PredictionResult pred = + PredictNoTreeWP(channel.w, p + x, onerow_image, x, y, + predictor, &wp_state); + val = pred.guess + palette_entry; + } else { + val = palette_entry; + } + p[x] = val; + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + } + }, + "UndoDeltaPaletteWP")); + } else { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, nb, ThreadPool::NoInit, + [&](const uint32_t c, size_t /* thread */) { + Channel &channel = input.channel[c0 + c]; + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + const pixel_type *JXL_RESTRICT idx = indices.Row(y); + for (size_t x = 0; x < channel.w; x++) { + int index = idx[x]; + pixel_type_w val = 0; + const pixel_type palette_entry = + palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + PredictionResult pred = PredictNoTreeNoWP( + channel.w, p + x, onerow_image, x, y, predictor); + val = pred.guess + palette_entry; + } else { + val = palette_entry; + } + p[x] = val; + } + } + }, + "UndoDeltaPaletteNoWP")); + } + } + if (c0 >= input.nb_meta_channels) { + // Palette was done on normal channels + input.nb_meta_channels--; + } else { + // Palette was done on metachannels + JXL_ASSERT(static_cast(input.nb_meta_channels) >= 2 - nb); + input.nb_meta_channels -= 2 - nb; + JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels); + } + input.channel.erase(input.channel.begin(), input.channel.begin() + 1); + return num_errors.load(std::memory_order_relaxed) == 0; +} + +Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t nb_colors, uint32_t nb_deltas, bool lossy) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c)); + + size_t nb = end_c - begin_c + 1; + if (begin_c >= input.nb_meta_channels) { + // Palette was done on normal channels + input.nb_meta_channels++; + } else { + // Palette was done on metachannels + JXL_ASSERT(end_c < input.nb_meta_channels); + // we remove nb-1 metachannels and add one + input.nb_meta_channels += 2 - nb; + } + input.channel.erase(input.channel.begin() + begin_c + 1, + input.channel.begin() + end_c + 1); + Channel pch(nb_colors + nb_deltas, nb); + pch.hshift = -1; + pch.vshift = -1; + input.channel.insert(input.channel.begin(), std::move(pch)); + return true; +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h new file mode 100644 index 0000000000..cc0f67960b --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h @@ -0,0 +1,129 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +namespace palette_internal { + +static constexpr int kMaxPaletteLookupTableSize = 1 << 16; + +static constexpr int kRgbChannels = 3; + +// 5x5x5 color cube for the larger cube. +static constexpr int kLargeCube = 5; + +// Smaller interleaved color cube to fill the holes of the larger cube. +static constexpr int kSmallCube = 4; +static constexpr int kSmallCubeBits = 2; +// kSmallCube ** 3 +static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube; + +static inline pixel_type Scale(uint64_t value, uint64_t bit_depth, + uint64_t denom) { + // return (value * ((static_cast(1) << bit_depth) - 1)) / denom; + // We only call this function with kSmallCube or kLargeCube - 1 as denom, + // allowing us to avoid a division here. + JXL_ASSERT(denom == 4); + return (value * ((static_cast(1) << bit_depth) - 1)) >> 2; +} + +// The purpose of this function is solely to extend the interpretation of +// palette indices to implicit values. If index < nb_deltas, indicating that the +// result is a delta palette entry, it is the responsibility of the caller to +// treat it as such. +static JXL_MAYBE_UNUSED pixel_type +GetPaletteValue(const pixel_type *const palette, int index, const size_t c, + const int palette_size, const int onerow, const int bit_depth) { + if (index < 0) { + static constexpr std::array, 72> kDeltaPalette = { + { + {{0, 0, 0}}, {{4, 4, 4}}, {{11, 0, 0}}, + {{0, 0, -13}}, {{0, -12, 0}}, {{-10, -10, -10}}, + {{-18, -18, -18}}, {{-27, -27, -27}}, {{-18, -18, 0}}, + {{0, 0, -32}}, {{-32, 0, 0}}, {{-37, -37, -37}}, + {{0, -32, -32}}, {{24, 24, 45}}, {{50, 50, 50}}, + {{-45, -24, -24}}, {{-24, -45, -45}}, {{0, -24, -24}}, + {{-34, -34, 0}}, {{-24, 0, -24}}, {{-45, -45, -24}}, + {{64, 64, 64}}, {{-32, 0, -32}}, {{0, -32, 0}}, + {{-32, 0, 32}}, {{-24, -45, -24}}, {{45, 24, 45}}, + {{24, -24, -45}}, {{-45, -24, 24}}, {{80, 80, 80}}, + {{64, 0, 0}}, {{0, 0, -64}}, {{0, -64, -64}}, + {{-24, -24, 45}}, {{96, 96, 96}}, {{64, 64, 0}}, + {{45, -24, -24}}, {{34, -34, 0}}, {{112, 112, 112}}, + {{24, -45, -45}}, {{45, 45, -24}}, {{0, -32, 32}}, + {{24, -24, 45}}, {{0, 96, 96}}, {{45, -24, 24}}, + {{24, -45, -24}}, {{-24, -45, 24}}, {{0, -64, 0}}, + {{96, 0, 0}}, {{128, 128, 128}}, {{64, 0, 64}}, + {{144, 144, 144}}, {{96, 96, 0}}, {{-36, -36, 36}}, + {{45, -24, -45}}, {{45, -45, -24}}, {{0, 0, -96}}, + {{0, 128, 128}}, {{0, 96, 0}}, {{45, 24, -45}}, + {{-128, 0, 0}}, {{24, -45, 24}}, {{-45, 24, -45}}, + {{64, 0, -64}}, {{64, -64, -64}}, {{96, 0, 96}}, + {{45, -45, 24}}, {{24, 45, -45}}, {{64, 64, -64}}, + {{128, 128, 0}}, {{0, 0, -128}}, {{-24, 45, -45}}, + }}; + if (c >= kRgbChannels) { + return 0; + } + // Do not open the brackets, otherwise INT32_MIN negation could overflow. + index = -(index + 1); + index %= 1 + 2 * (kDeltaPalette.size() - 1); + static constexpr int kMultiplier[] = {-1, 1}; + pixel_type result = + kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1]; + if (bit_depth > 8) { + result *= static_cast(1) << (bit_depth - 8); + } + return result; + } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) { + if (c >= kRgbChannels) return 0; + index -= palette_size; + index >>= c * kSmallCubeBits; + return Scale(index % kSmallCube, bit_depth, kSmallCube) + + (1 << (std::max(0, bit_depth - 3))); + } else if (palette_size + kLargeCubeOffset <= index) { + if (c >= kRgbChannels) return 0; + index -= palette_size + kLargeCubeOffset; + // TODO(eustas): should we take care of ambiguity created by + // index >= kLargeCube ** 3 ? + switch (c) { + case 0: + break; + case 1: + index /= kLargeCube; + break; + case 2: + index /= kLargeCube * kLargeCube; + break; + } + return Scale(index % kLargeCube, bit_depth, kLargeCube - 1); + } + return palette[c * onerow + static_cast(index)]; +} + +} // namespace palette_internal + +Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors, + uint32_t nb_deltas, Predictor predictor, + const weighted::Header &wp_header, ThreadPool *pool); + +Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t nb_colors, uint32_t nb_deltas, bool lossy); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc new file mode 100644 index 0000000000..f3002a5ac3 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc @@ -0,0 +1,153 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/rct.h" +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc" +#include +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; + +template +void InvRCTRow(const pixel_type* in0, const pixel_type* in1, + const pixel_type* in2, pixel_type* out0, pixel_type* out1, + pixel_type* out2, size_t w) { + static_assert(transform_type >= 0 && transform_type < 7, + "Invalid transform type"); + int second = transform_type >> 1; + int third = transform_type & 1; + + size_t x = 0; + const HWY_FULL(pixel_type) d; + const size_t N = Lanes(d); + for (; x + N - 1 < w; x += N) { + if (transform_type == 6) { + auto Y = Load(d, in0 + x); + auto Co = Load(d, in1 + x); + auto Cg = Load(d, in2 + x); + Y = Sub(Y, ShiftRight<1>(Cg)); + auto G = Add(Cg, Y); + Y = Sub(Y, ShiftRight<1>(Co)); + auto R = Add(Y, Co); + Store(R, d, out0 + x); + Store(G, d, out1 + x); + Store(Y, d, out2 + x); + } else { + auto First = Load(d, in0 + x); + auto Second = Load(d, in1 + x); + auto Third = Load(d, in2 + x); + if (third) Third = Add(Third, First); + if (second == 1) { + Second = Add(Second, First); + } else if (second == 2) { + Second = Add(Second, ShiftRight<1>(Add(First, Third))); + } + Store(First, d, out0 + x); + Store(Second, d, out1 + x); + Store(Third, d, out2 + x); + } + } + for (; x < w; x++) { + if (transform_type == 6) { + pixel_type Y = in0[x]; + pixel_type Co = in1[x]; + pixel_type Cg = in2[x]; + pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); + pixel_type G = PixelAdd(Cg, tmp); + pixel_type B = PixelAdd(tmp, -(Co >> 1)); + pixel_type R = PixelAdd(B, Co); + out0[x] = R; + out1[x] = G; + out2[x] = B; + } else { + pixel_type First = in0[x]; + pixel_type Second = in1[x]; + pixel_type Third = in2[x]; + if (third) Third = PixelAdd(Third, First); + if (second == 1) { + Second = PixelAdd(Second, First); + } else if (second == 2) { + Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); + } + out0[x] = First; + out1[x] = Second; + out2[x] = Third; + } + } +} + +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); + size_t m = begin_c; + Channel& c0 = input.channel[m + 0]; + size_t w = c0.w; + size_t h = c0.h; + if (rct_type == 0) { // noop + return true; + } + // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR + int permutation = rct_type / 7; + JXL_CHECK(permutation < 6); + // 0-5 values have the low bit corresponding to Third and the high bits + // corresponding to Second. 6 corresponds to YCoCg. + // + // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird + // + // Third: 0=nop, 1=SubtractFirst + int custom = rct_type % 7; + // Special case: permute-only. Swap channels around. + if (custom == 0) { + Channel ch0 = std::move(input.channel[m]); + Channel ch1 = std::move(input.channel[m + 1]); + Channel ch2 = std::move(input.channel[m + 2]); + input.channel[m + (permutation % 3)] = std::move(ch0); + input.channel[m + ((permutation + 1 + permutation / 3) % 3)] = + std::move(ch1); + input.channel[m + ((permutation + 2 - permutation / 3) % 3)] = + std::move(ch2); + return true; + } + constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = { + InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>, + InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + const pixel_type* in0 = input.channel[m].Row(y); + const pixel_type* in1 = input.channel[m + 1].Row(y); + const pixel_type* in2 = input.channel[m + 2].Row(y); + pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y); + pixel_type* out1 = + input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); + pixel_type* out2 = + input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); + inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w); + }, + "InvRCT")); + return true; +} + +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(InvRCT); +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool); +} + +} // namespace jxl +#endif diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h new file mode 100644 index 0000000000..aef65621d5 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_ +#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_ + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_RCT_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc new file mode 100644 index 0000000000..8440d9e804 --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc @@ -0,0 +1,478 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/squeeze.h" + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/squeeze.cc" +#include +#include + +#include "lib/jxl/simd_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenZeroElse; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::MulEven; +using hwy::HWY_NAMESPACE::Ne; +using hwy::HWY_NAMESPACE::Neg; +using hwy::HWY_NAMESPACE::OddEven; +using hwy::HWY_NAMESPACE::RebindToUnsigned; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Xor; + +#if HWY_TARGET != HWY_SCALAR + +JXL_INLINE void FastUnsqueeze(const pixel_type *JXL_RESTRICT p_residual, + const pixel_type *JXL_RESTRICT p_avg, + const pixel_type *JXL_RESTRICT p_navg, + const pixel_type *p_pout, + pixel_type *JXL_RESTRICT p_out, + pixel_type *p_nout) { + const HWY_CAPPED(pixel_type, 8) d; + const RebindToUnsigned du; + const size_t N = Lanes(d); + auto onethird = Set(d, 0x55555556); + for (size_t x = 0; x < 8; x += N) { + auto avg = Load(d, p_avg + x); + auto next_avg = Load(d, p_navg + x); + auto top = Load(d, p_pout + x); + // Equivalent to SmoothTendency(top,avg,next_avg), but without branches + auto Ba = Sub(top, avg); + auto an = Sub(avg, next_avg); + auto nonmono = Xor(Ba, an); + auto absBa = Abs(Ba); + auto absan = Abs(an); + auto absBn = Abs(Sub(top, next_avg)); + // Compute a3 = absBa / 3 + auto a3e = BitCast(d, ShiftRight<32>(MulEven(absBa, onethird))); + auto a3oi = MulEven(Reverse(d, absBa), onethird); + auto a3o = BitCast( + d, Reverse(hwy::HWY_NAMESPACE::Repartition(), + a3oi)); + auto a3 = OddEven(a3o, a3e); + a3 = Add(a3, Add(absBn, Set(d, 2))); + auto absdiff = ShiftRight<2>(a3); + auto skipdiff = Ne(Ba, Zero(d)); + skipdiff = And(skipdiff, Ne(an, Zero(d))); + skipdiff = And(skipdiff, Lt(nonmono, Zero(d))); + auto absBa2 = Add(ShiftLeft<1>(absBa), And(absdiff, Set(d, 1))); + absdiff = IfThenElse(Gt(absdiff, absBa2), + Add(ShiftLeft<1>(absBa), Set(d, 1)), absdiff); + auto absan2 = ShiftLeft<1>(absan); + absdiff = IfThenElse(Gt(Add(absdiff, And(absdiff, Set(d, 1))), absan2), + absan2, absdiff); + auto diff1 = IfThenElse(Lt(top, next_avg), Neg(absdiff), absdiff); + auto tendency = IfThenZeroElse(skipdiff, diff1); + + auto diff_minus_tendency = Load(d, p_residual + x); + auto diff = Add(diff_minus_tendency, tendency); + auto out = + Add(avg, ShiftRight<1>( + Add(diff, BitCast(d, ShiftRight<31>(BitCast(du, diff)))))); + Store(out, d, p_out + x); + Store(Sub(out, diff), d, p_nout + x); + } +} + +#endif + +Status InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) { + JXL_ASSERT(c < input.channel.size()); + JXL_ASSERT(rc < input.channel.size()); + Channel &chin = input.channel[c]; + const Channel &chin_residual = input.channel[rc]; + // These must be valid since we ran MetaApply already. + JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2)); + JXL_ASSERT(chin.h == chin_residual.h); + + if (chin_residual.w == 0) { + // Short-circuit: output channel has same dimensions as input. + input.channel[c].hshift--; + return true; + } + + // Note: chin.w >= chin_residual.w and at most 1 different. + Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift); + JXL_DEBUG_V(4, + "Undoing horizontal squeeze of channel %i using residuals in " + "channel %i (going from width %" PRIuS " to %" PRIuS ")", + c, rc, chin.w, chout.w); + + if (chin_residual.h == 0) { + // Short-circuit: channel with no pixels. + input.channel[c] = std::move(chout); + return true; + } + auto unsqueeze_row = [&](size_t y, size_t x0) { + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y); + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + for (size_t x = x0; x < chin_residual.w; x++) { + pixel_type_w diff_minus_tendency = p_residual[x]; + pixel_type_w avg = p_avg[x]; + pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg); + pixel_type_w left = (x ? p_out[(x << 1) - 1] : avg); + pixel_type_w tendency = SmoothTendency(left, avg, next_avg); + pixel_type_w diff = diff_minus_tendency + tendency; + pixel_type_w A = avg + (diff / 2); + p_out[(x << 1)] = A; + pixel_type_w B = A - diff; + p_out[(x << 1) + 1] = B; + } + if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1]; + }; + + // somewhat complicated trickery just to be able to SIMD this. + // Horizontal unsqueeze has horizontal data dependencies, so we do + // 8 rows at a time and treat it as a vertical unsqueeze of a + // transposed 8x8 block (or 9x8 for one input). + static constexpr const size_t kRowsPerThread = 8; + const auto unsqueeze_span = [&](const uint32_t task, size_t /* thread */) { + const size_t y0 = task * kRowsPerThread; + const size_t rows = std::min(kRowsPerThread, chin.h - y0); + size_t x = 0; + +#if HWY_TARGET != HWY_SCALAR + intptr_t onerow_in = chin.plane.PixelsPerRow(); + intptr_t onerow_inr = chin_residual.plane.PixelsPerRow(); + intptr_t onerow_out = chout.plane.PixelsPerRow(); + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y0); + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y0); + pixel_type *JXL_RESTRICT p_out = chout.Row(y0); + HWY_ALIGN pixel_type b_p_avg[9 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_residual[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_even[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_odd[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_evenT[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_oddT[8 * kRowsPerThread]; + const HWY_CAPPED(pixel_type, 8) d; + const size_t N = Lanes(d); + if (chin_residual.w > 16 && rows == kRowsPerThread) { + for (; x < chin_residual.w - 9; x += 8) { + Transpose8x8Block(p_residual + x, b_p_residual, onerow_inr); + Transpose8x8Block(p_avg + x, b_p_avg, onerow_in); + for (size_t y = 0; y < kRowsPerThread; y++) { + b_p_avg[8 * 8 + y] = p_avg[x + 8 + onerow_in * y]; + } + for (size_t i = 0; i < 8; i++) { + FastUnsqueeze( + b_p_residual + 8 * i, b_p_avg + 8 * i, b_p_avg + 8 * (i + 1), + (x + i ? b_p_out_odd + 8 * ((x + i - 1) & 7) : b_p_avg + 8 * i), + b_p_out_even + 8 * i, b_p_out_odd + 8 * i); + } + + Transpose8x8Block(b_p_out_even, b_p_out_evenT, 8); + Transpose8x8Block(b_p_out_odd, b_p_out_oddT, 8); + for (size_t y = 0; y < kRowsPerThread; y++) { + for (size_t i = 0; i < kRowsPerThread; i += N) { + auto even = Load(d, b_p_out_evenT + 8 * y + i); + auto odd = Load(d, b_p_out_oddT + 8 * y + i); + StoreInterleaved(d, even, odd, + p_out + ((x + i) << 1) + onerow_out * y); + } + } + } + } +#endif + for (size_t y = 0; y < rows; y++) { + unsqueeze_row(y0 + y, x); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.h, kRowsPerThread), + ThreadPool::NoInit, unsqueeze_span, + "InvHorizontalSqueeze")); + input.channel[c] = std::move(chout); + return true; +} + +Status InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) { + JXL_ASSERT(c < input.channel.size()); + JXL_ASSERT(rc < input.channel.size()); + const Channel &chin = input.channel[c]; + const Channel &chin_residual = input.channel[rc]; + // These must be valid since we ran MetaApply already. + JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2)); + JXL_ASSERT(chin.w == chin_residual.w); + + if (chin_residual.h == 0) { + // Short-circuit: output channel has same dimensions as input. + input.channel[c].vshift--; + return true; + } + + // Note: chin.h >= chin_residual.h and at most 1 different. + Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1); + JXL_DEBUG_V( + 4, + "Undoing vertical squeeze of channel %i using residuals in channel " + "%i (going from height %" PRIuS " to %" PRIuS ")", + c, rc, chin.h, chout.h); + + if (chin_residual.w == 0) { + // Short-circuit: channel with no pixels. + input.channel[c] = std::move(chout); + return true; + } + + static constexpr const int kColsPerThread = 64; + const auto unsqueeze_slice = [&](const uint32_t task, size_t /* thread */) { + const size_t x0 = task * kColsPerThread; + const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w); + const size_t w = x1 - x0; + // We only iterate up to std::min(chin_residual.h, chin.h) which is + // always chin_residual.h. + for (size_t y = 0; y < chin_residual.h; y++) { + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y) + x0; + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y) + x0; + const pixel_type *JXL_RESTRICT p_navg = + chin.Row(y + 1 < chin.h ? y + 1 : y) + x0; + pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1) + x0; + pixel_type *JXL_RESTRICT p_nout = chout.Row((y << 1) + 1) + x0; + const pixel_type *p_pout = y > 0 ? chout.Row((y << 1) - 1) + x0 : p_avg; + size_t x = 0; +#if HWY_TARGET != HWY_SCALAR + for (; x + 7 < w; x += 8) { + FastUnsqueeze(p_residual + x, p_avg + x, p_navg + x, p_pout + x, + p_out + x, p_nout + x); + } +#endif + for (; x < w; x++) { + pixel_type_w avg = p_avg[x]; + pixel_type_w next_avg = p_navg[x]; + pixel_type_w top = p_pout[x]; + pixel_type_w tendency = SmoothTendency(top, avg, next_avg); + pixel_type_w diff_minus_tendency = p_residual[x]; + pixel_type_w diff = diff_minus_tendency + tendency; + pixel_type_w out = avg + (diff / 2); + p_out[x] = out; + // If the chin_residual.h == chin.h, the output has an even number + // of rows so the next line is fine. Otherwise, this loop won't + // write to the last output row which is handled separately. + p_nout[x] = out - diff; + } + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.w, kColsPerThread), + ThreadPool::NoInit, unsqueeze_slice, + "InvVertSqueeze")); + + if (chout.h & 1) { + size_t y = chin.h - 1; + const pixel_type *p_avg = chin.Row(y); + pixel_type *p_out = chout.Row(y << 1); + for (size_t x = 0; x < chin.w; x++) { + p_out[x] = p_avg[x]; + } + } + input.channel[c] = std::move(chout); + return true; +} + +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + for (int i = parameters.size() - 1; i >= 0; i--) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams(parameters[i], input.channel.size())); + bool horizontal = parameters[i].horizontal; + bool in_place = parameters[i].in_place; + uint32_t beginc = parameters[i].begin_c; + uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1; + uint32_t offset; + if (in_place) { + offset = endc + 1; + } else { + offset = input.channel.size() + beginc - endc - 1; + } + if (beginc < input.nb_meta_channels) { + // This is checked in MetaSqueeze. + JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c); + input.nb_meta_channels -= parameters[i].num_c; + } + + for (uint32_t c = beginc; c <= endc; c++) { + uint32_t rc = offset + c - beginc; + // MetaApply should imply that `rc` is within range, otherwise there's a + // programming bug. + JXL_ASSERT(rc < input.channel.size()); + if ((input.channel[c].w < input.channel[rc].w) || + (input.channel[c].h < input.channel[rc].h)) { + return JXL_FAILURE("Corrupted squeeze transform"); + } + if (horizontal) { + JXL_RETURN_IF_ERROR(InvHSqueeze(input, c, rc, pool)); + } else { + JXL_RETURN_IF_ERROR(InvVSqueeze(input, c, rc, pool)); + } + } + input.channel.erase(input.channel.begin() + offset, + input.channel.begin() + offset + (endc - beginc + 1)); + } + return true; +} + +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jxl { + +HWY_EXPORT(InvSqueeze); +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + return HWY_DYNAMIC_DISPATCH(InvSqueeze)(input, parameters, pool); +} + +void DefaultSqueezeParameters(std::vector *parameters, + const Image &image) { + int nb_channels = image.channel.size() - image.nb_meta_channels; + + parameters->clear(); + size_t w = image.channel[image.nb_meta_channels].w; + size_t h = image.channel[image.nb_meta_channels].h; + JXL_DEBUG_V( + 7, "Default squeeze parameters for %" PRIuS "x%" PRIuS " image: ", w, h); + + // do horizontal first on wide images; vertical first on tall images + bool wide = (w > h); + + if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w && + image.channel[image.nb_meta_channels + 1].h == h) { + // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0 + // previews + JXL_DEBUG_V(7, "(4:2:0 chroma), %" PRIuS "x%" PRIuS " image", w, h); + SqueezeParams params; + // horizontal chroma squeeze + params.horizontal = true; + params.in_place = false; + params.begin_c = image.nb_meta_channels + 1; + params.num_c = 2; + parameters->push_back(params); + params.horizontal = false; + // vertical chroma squeeze + parameters->push_back(params); + } + SqueezeParams params; + params.begin_c = image.nb_meta_channels; + params.num_c = nb_channels; + params.in_place = true; + + if (!wide) { + if (h > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = false; + parameters->push_back(params); + h = (h + 1) / 2; + JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h); + } + } + while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) { + if (w > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = true; + parameters->push_back(params); + w = (w + 1) / 2; + JXL_DEBUG_V(7, "Horizontal (%" PRIuS "x%" PRIuS "), ", w, h); + } + if (h > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = false; + parameters->push_back(params); + h = (h + 1) / 2; + JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h); + } + } + JXL_DEBUG_V(7, "that's it"); +} + +Status CheckMetaSqueezeParams(const SqueezeParams ¶meter, + int num_channels) { + int c1 = parameter.begin_c; + int c2 = parameter.begin_c + parameter.num_c - 1; + if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) { + return JXL_FAILURE("Invalid channel range"); + } + return true; +} + +Status MetaSqueeze(Image &image, std::vector *parameters) { + if (parameters->empty()) { + DefaultSqueezeParameters(parameters, image); + } + + for (size_t i = 0; i < parameters->size(); i++) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams((*parameters)[i], image.channel.size())); + bool horizontal = (*parameters)[i].horizontal; + bool in_place = (*parameters)[i].in_place; + uint32_t beginc = (*parameters)[i].begin_c; + uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1; + + uint32_t offset; + if (beginc < image.nb_meta_channels) { + if (endc >= image.nb_meta_channels) { + return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels"); + } + if (!in_place) { + return JXL_FAILURE( + "Invalid squeeze: meta channels require in-place residuals"); + } + image.nb_meta_channels += (*parameters)[i].num_c; + } + if (in_place) { + offset = endc + 1; + } else { + offset = image.channel.size(); + } + for (uint32_t c = beginc; c <= endc; c++) { + if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) { + return JXL_FAILURE("Too many squeezes: shift > 30"); + } + size_t w = image.channel[c].w; + size_t h = image.channel[c].h; + if (w == 0 || h == 0) return JXL_FAILURE("Squeezing empty channel"); + if (horizontal) { + image.channel[c].w = (w + 1) / 2; + if (image.channel[c].hshift >= 0) image.channel[c].hshift++; + w = w - (w + 1) / 2; + } else { + image.channel[c].h = (h + 1) / 2; + if (image.channel[c].vshift >= 0) image.channel[c].vshift++; + h = h - (h + 1) / 2; + } + image.channel[c].shrink(); + Channel dummy(w, h); + dummy.hshift = image.channel[c].hshift; + dummy.vshift = image.channel[c].vshift; + + image.channel.insert(image.channel.begin() + offset + (c - beginc), + std::move(dummy)); + JXL_DEBUG_V(8, "MetaSqueeze applied, current image: %s", + image.DebugString().c_str()); + } + } + return true; +} + +} // namespace jxl + +#endif diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h new file mode 100644 index 0000000000..fb18710a6f --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ + +// Haar-like transform: halves the resolution in one direction +// A B -> (A+B)>>1 in one channel (average) -> same range as +// original channel +// A-B - tendency in a new channel ('residual' needed to make +// the transform reversible) +// -> theoretically range could be 2.5 +// times larger (2 times without the +// 'tendency'), but there should be lots +// of zeroes +// Repeated application (alternating horizontal and vertical squeezes) results +// in downscaling +// +// The default coefficient ordering is low-frequency to high-frequency, as in +// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using +// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp. +// 205-220, April 1992, doi: 10.1109/83.136597. + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +#define JXL_MAX_FIRST_PREVIEW_SIZE 8 + +namespace jxl { + +/* + int avg=(A+B)>>1; + int diff=(A-B); + int rA=(diff+(avg<<1)+(diff&1))>>1; + int rB=rA-diff; + +*/ +// |A B|C D|E F| +// p a n p=avg(A,B), a=avg(C,D), n=avg(E,F) +// +// Goal: estimate C-D (avoiding ringing artifacts) +// (ensuring that in smooth areas, a zero residual corresponds to a smooth +// gradient) + +// best estimate for C: (B + 2*a)/3 +// best estimate for D: (n + 3*a)/4 +// best estimate for C-D: 4*B - 3*n - a /12 + +// avoid ringing by 1) only doing this if B <= a <= n or B >= a >= n +// (otherwise, this is not a smooth area and we cannot really estimate C-D) +// 2) making sure that B <= C <= D <= n or B >= C >= D >= n + +inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a, + pixel_type_w n) { + pixel_type_w diff = 0; + if (B >= a && a >= n) { + diff = (4 * B - 3 * n - a + 6) / 12; + // 2C = a<<1 + diff - diff&1 <= 2B so diff - diff&1 <= 2B - 2a + // 2D = a<<1 - diff - diff&1 >= 2n so diff + diff&1 <= 2a - 2n + if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1; + if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n); + } else if (B <= a && a <= n) { + diff = (4 * B - 3 * n - a - 6) / 12; + // 2C = a<<1 + diff + diff&1 >= 2B so diff + diff&1 >= 2B - 2a + // 2D = a<<1 - diff + diff&1 <= 2n so diff - diff&1 >= 2a - 2n + if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1; + if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n); + } + return diff; +} + +void DefaultSqueezeParameters(std::vector *parameters, + const Image &image); + +Status CheckMetaSqueezeParams(const SqueezeParams ¶meter, int num_channels); + +Status MetaSqueeze(Image &image, std::vector *parameters); + +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc new file mode 100644 index 0000000000..d9f2b435bf --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/transform.h" + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/palette.h" +#include "lib/jxl/modular/transform/rct.h" +#include "lib/jxl/modular/transform/squeeze.h" + +namespace jxl { + +SqueezeParams::SqueezeParams() { Bundle::Init(this); } +Transform::Transform(TransformId id) { + Bundle::Init(this); + this->id = id; +} + +Status Transform::Inverse(Image &input, const weighted::Header &wp_header, + ThreadPool *pool) { + JXL_DEBUG_V(6, "Input channels (%" PRIuS ", %" PRIuS " meta): ", + input.channel.size(), input.nb_meta_channels); + switch (id) { + case TransformId::kRCT: + return InvRCT(input, begin_c, rct_type, pool); + case TransformId::kSqueeze: + return InvSqueeze(input, squeezes, pool); + case TransformId::kPalette: + return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor, + wp_header, pool); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(id)); + } +} + +Status Transform::MetaApply(Image &input) { + JXL_DEBUG_V(6, "MetaApply input: %s", input.DebugString().c_str()); + switch (id) { + case TransformId::kRCT: + JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type); + return CheckEqualChannels(input, begin_c, begin_c + 2); + case TransformId::kSqueeze: + JXL_DEBUG_V(2, "Transform: kSqueeze:"); +#if JXL_DEBUG_V_LEVEL >= 2 + { + auto squeezes_copy = squeezes; + if (squeezes_copy.empty()) { + DefaultSqueezeParameters(&squeezes_copy, input); + } + for (const auto ¶ms : squeezes_copy) { + JXL_DEBUG_V( + 2, + " squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32 + ", num_c=%" PRIu32, + params.horizontal, params.in_place, params.begin_c, params.num_c); + } + } +#endif + return MetaSqueeze(input, &squeezes); + case TransformId::kPalette: + JXL_DEBUG_V(2, + "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32 + ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32, + begin_c, num_c, nb_colors, nb_deltas); + return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors, + nb_deltas, lossy_palette); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(id)); + } +} + +Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) { + if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) { + return JXL_FAILURE("Invalid channel range: %u..%u (there are only %" PRIuS + " channels)", + c1, c2, image.channel.size()); + } + if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) { + return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta"); + } + const auto &ch1 = image.channel[c1]; + for (size_t c = c1 + 1; c <= c2; c++) { + const auto &ch2 = image.channel[c]; + if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift || + ch1.vshift != ch2.vshift) { + return false; + } + } + return true; +} + +} // namespace jxl diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h new file mode 100644 index 0000000000..d5d3259f7a --- /dev/null +++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ +#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +enum class TransformId : uint32_t { + // G, R-G, B-G and variants (including YCoCg). + kRCT = 0, + + // Color palette. Parameters are: [begin_c] [end_c] [nb_colors] + kPalette = 1, + + // Squeezing (Haar-style) + kSqueeze = 2, + + // Invalid for now. + kInvalid = 3, +}; + +struct SqueezeParams : public Fields { + JXL_FIELDS_NAME(SqueezeParams) + bool horizontal; + bool in_place; + uint32_t begin_c; + uint32_t num_c; + SqueezeParams(); + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal)); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8), + BitsOffset(10, 72), + BitsOffset(13, 1096), 0, &begin_c)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c)); + return true; + } +}; + +class Transform : public Fields { + public: + TransformId id; + // for Palette and RCT. + uint32_t begin_c; + // for RCT. 42 possible values starting from 0. + uint32_t rct_type; + // Only for Palette and NearLossless. + uint32_t num_c; + // Only for Palette. + uint32_t nb_colors; + uint32_t nb_deltas; + // for Squeeze. Default squeeze if empty. + std::vector squeezes; + // for NearLossless, not serialized. + int max_delta_error; + // Serialized for Palette. + Predictor predictor; + // for Palette, not serialized. + bool ordered_palette = true; + bool lossy_palette = false; + + explicit Transform(TransformId id); + // default constructor for bundles. + Transform() : Transform(TransformId::kInvalid) {} + + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette), + Val((uint32_t)TransformId::kSqueeze), + Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT, + reinterpret_cast(&id))); + if (id == TransformId::kInvalid) { + return JXL_FAILURE("Invalid transform ID"); + } + if (visitor->Conditional(id == TransformId::kRCT || + id == TransformId::kPalette)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72), + BitsOffset(13, 1096), 0, &begin_c)); + } + if (visitor->Conditional(id == TransformId::kRCT)) { + // 0-41, default YCoCg. + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2), + BitsOffset(6, 10), 6, &rct_type)); + if (rct_type >= 42) { + return JXL_FAILURE("Invalid transform RCT type"); + } + } + if (visitor->Conditional(id == TransformId::kPalette)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280), + BitsOffset(16, 5376), 256, &nb_colors)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257), + BitsOffset(16, 1281), 0, &nb_deltas)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bits(4, (uint32_t)Predictor::Zero, + reinterpret_cast(&predictor))); + if (predictor >= Predictor::Best) { + return JXL_FAILURE("Invalid predictor"); + } + } + + if (visitor->Conditional(id == TransformId::kSqueeze)) { + uint32_t num_squeezes = static_cast(squeezes.size()); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9), + BitsOffset(8, 41), 0, &num_squeezes)); + if (visitor->IsReading()) squeezes.resize(num_squeezes); + for (size_t i = 0; i < num_squeezes; i++) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i])); + } + } + return true; + } + + JXL_FIELDS_NAME(Transform) + + Status Inverse(Image &input, const weighted::Header &wp_header, + ThreadPool *pool = nullptr); + Status MetaApply(Image &input); +}; + +Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2); + +static inline pixel_type PixelAdd(pixel_type a, pixel_type b) { + return static_cast(static_cast(a) + + static_cast(b)); +} + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ -- cgit v1.2.3