From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 19:32:43 +0200
Subject: Adding upstream version 1:115.7.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../lib/jxl/modular/transform/enc_palette.cc       | 606 +++++++++++++++++++++
 .../lib/jxl/modular/transform/enc_palette.h        |  22 +
 .../jpeg-xl/lib/jxl/modular/transform/enc_rct.cc   |  73 +++
 .../jpeg-xl/lib/jxl/modular/transform/enc_rct.h    |  17 +
 .../lib/jxl/modular/transform/enc_squeeze.cc       | 141 +++++
 .../lib/jxl/modular/transform/enc_squeeze.h        |  20 +
 .../lib/jxl/modular/transform/enc_transform.cc     |  46 ++
 .../lib/jxl/modular/transform/enc_transform.h      |  22 +
 .../jpeg-xl/lib/jxl/modular/transform/palette.cc   | 176 ++++++
 .../jpeg-xl/lib/jxl/modular/transform/palette.h    | 129 +++++
 .../jpeg-xl/lib/jxl/modular/transform/rct.cc       | 153 ++++++
 .../jpeg-xl/lib/jxl/modular/transform/rct.h        |  20 +
 .../jpeg-xl/lib/jxl/modular/transform/squeeze.cc   | 478 ++++++++++++++++
 .../jpeg-xl/lib/jxl/modular/transform/squeeze.h    |  90 +++
 .../jpeg-xl/lib/jxl/modular/transform/transform.cc |  98 ++++
 .../jpeg-xl/lib/jxl/modular/transform/transform.h  | 148 +++++
 16 files changed, 2239 insertions(+)
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/rct.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc
 create mode 100644 third_party/jpeg-xl/lib/jxl/modular/transform/transform.h

(limited to 'third_party/jpeg-xl/lib/jxl/modular/transform')
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc
new file mode 100644
index 0000000000..bc31445bc5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc
@@ -0,0 +1,606 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+
+#include <array>
+#include <map>
+#include <set>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr bool kEncodeToHighQualityImplicitPalette = true;
+
+// Inclusive.
+static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1);
+
+float ColorDistance(const std::vector<float> &JXL_RESTRICT a,
+                    const std::vector<pixel_type> &JXL_RESTRICT b) {
+  JXL_ASSERT(a.size() == b.size());
+  float distance = 0;
+  float ave3 = 0;
+  if (a.size() >= 3) {
+    ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f);
+  }
+  float sum_a = 0, sum_b = 0;
+  for (size_t c = 0; c < a.size(); ++c) {
+    const float difference =
+        static_cast<float>(a[c]) - static_cast<float>(b[c]);
+    float weight = c == 0 ? 3 : c == 1 ? 5 : 2;
+    if (c < 3 && (a[c] + b[c] >= ave3)) {
+      const float add_w[3] = {
+          1.15,
+          1.15,
+          1.12,
+      };
+      weight += add_w[c];
+      if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) {
+        weight -= 0.5;
+      }
+    }
+    distance += difference * difference * weight * weight;
+    const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1;
+    sum_a += a[c] * sum_weight;
+    sum_b += b[c] * sum_weight;
+  }
+  distance *= 4;
+  float sum_difference = sum_a - sum_b;
+  distance += sum_difference * sum_difference;
+  return distance;
+}
+
+static int QuantizeColorToImplicitPaletteIndex(
+    const std::vector<pixel_type> &color, const int palette_size,
+    const int bit_depth, bool high_quality) {
+  int index = 0;
+  if (high_quality) {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      index += quantized * multiplier;
+      multiplier *= kLargeCube;
+    }
+    return index + palette_size + kLargeCubeOffset;
+  } else {
+    int multiplier = 1;
+    for (size_t c = 0; c < color.size(); c++) {
+      int value = color[c];
+      value -= 1 << (std::max(0, bit_depth - 3));
+      value = std::max(0, value);
+      int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) /
+                      ((1 << bit_depth) - 1);
+      JXL_ASSERT((quantized % kLargeCube) == quantized);
+      if (quantized > kSmallCube - 1) {
+        quantized = kSmallCube - 1;
+      }
+      index += quantized * multiplier;
+      multiplier *= kSmallCube;
+    }
+    return index + palette_size;
+  }
+}
+
+}  // namespace palette_internal
+
+int RoundInt(int value, int div) {  // symmetric rounding around 0
+  if (value < 0) return -RoundInt(-value, div);
+  return (value + div / 2) / div;
+}
+
+struct PaletteIterationData {
+  static constexpr int kMaxDeltas = 128;
+  bool final_run = false;
+  std::vector<pixel_type> deltas[3];
+  std::vector<double> delta_distances;
+  std::vector<pixel_type> frequent_deltas[3];
+
+  // Populates `frequent_deltas` with items from `deltas` based on frequencies
+  // and color distances.
+  void FindFrequentColorDeltas(int num_pixels, int bitdepth) {
+    using pixel_type_3d = std::array<pixel_type, 3>;
+    std::map<pixel_type_3d, double> delta_frequency_map;
+    pixel_type bucket_size = 3 << std::max(0, bitdepth - 8);
+    // Store frequency weighted by delta distance from quantized value.
+    for (size_t i = 0; i < deltas[0].size(); ++i) {
+      pixel_type_3d delta = {
+          {RoundInt(deltas[0][i], bucket_size),
+           RoundInt(deltas[1][i], bucket_size),
+           RoundInt(deltas[2][i], bucket_size)}};  // a basic form of clustering
+      if (delta[0] == 0 && delta[1] == 0 && delta[2] == 0) continue;
+      delta_frequency_map[delta] += sqrt(sqrt(delta_distances[i]));
+    }
+
+    const float delta_distance_multiplier = 1.0f / num_pixels;
+
+    // Weigh frequencies by magnitude and normalize.
+    for (auto &delta_frequency : delta_frequency_map) {
+      std::vector<pixel_type> current_delta = {delta_frequency.first[0],
+                                               delta_frequency.first[1],
+                                               delta_frequency.first[2]};
+      float delta_distance =
+          sqrt(palette_internal::ColorDistance({0, 0, 0}, current_delta)) + 1;
+      delta_frequency.second *= delta_distance * delta_distance_multiplier;
+    }
+
+    // Sort by weighted frequency.
+    using pixel_type_3d_frequency = std::pair<pixel_type_3d, double>;
+    std::vector<pixel_type_3d_frequency> sorted_delta_frequency_map(
+        delta_frequency_map.begin(), delta_frequency_map.end());
+    std::sort(
+        sorted_delta_frequency_map.begin(), sorted_delta_frequency_map.end(),
+        [](const pixel_type_3d_frequency &a, const pixel_type_3d_frequency &b) {
+          return a.second > b.second;
+        });
+
+    // Store the top deltas.
+    for (auto &delta_frequency : sorted_delta_frequency_map) {
+      if (frequent_deltas[0].size() >= kMaxDeltas) break;
+      // Number obtained by optimizing on jyrki31 corpus:
+      if (delta_frequency.second < 17) break;
+      for (int c = 0; c < 3; ++c) {
+        frequent_deltas[c].push_back(delta_frequency.first[c] * bucket_size);
+      }
+    }
+  }
+};
+
+Status FwdPaletteIteration(Image &input, uint32_t begin_c, uint32_t end_c,
+                           uint32_t &nb_colors, uint32_t &nb_deltas,
+                           bool ordered, bool lossy, Predictor &predictor,
+                           const weighted::Header &wp_header,
+                           PaletteIterationData &palette_iteration_data) {
+  JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+  JXL_ASSERT(begin_c >= input.nb_meta_channels);
+  uint32_t nb = end_c - begin_c + 1;
+
+  size_t w = input.channel[begin_c].w;
+  size_t h = input.channel[begin_c].h;
+
+  if (!lossy && nb == 1) {
+    // Channel palette special case
+    if (nb_colors == 0) return false;
+    std::vector<pixel_type> lookup;
+    pixel_type minval, maxval;
+    compute_minmax(input.channel[begin_c], &minval, &maxval);
+    size_t lookup_table_size =
+        static_cast<int64_t>(maxval) - static_cast<int64_t>(minval) + 1;
+    if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) {
+      // a lookup table would use too much memory, instead use a slower approach
+      // with std::set
+      std::set<pixel_type> chpalette;
+      pixel_type idx = 0;
+      for (size_t y = 0; y < h; y++) {
+        const pixel_type *p = input.channel[begin_c].Row(y);
+        for (size_t x = 0; x < w; x++) {
+          const bool new_color = chpalette.insert(p[x]).second;
+          if (new_color) {
+            idx++;
+            if (idx > (int)nb_colors) return false;
+          }
+        }
+      }
+      JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+      Channel pch(idx, 1);
+      pch.hshift = -1;
+      pch.vshift = -1;
+      nb_colors = idx;
+      idx = 0;
+      pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+      for (pixel_type p : chpalette) {
+        p_palette[idx++] = p;
+      }
+      for (size_t y = 0; y < h; y++) {
+        pixel_type *p = input.channel[begin_c].Row(y);
+        for (size_t x = 0; x < w; x++) {
+          for (idx = 0; p[x] != p_palette[idx] && idx < (int)nb_colors; idx++) {
+          }
+          JXL_DASSERT(idx < (int)nb_colors);
+          p[x] = idx;
+        }
+      }
+      predictor = Predictor::Zero;
+      input.nb_meta_channels++;
+      input.channel.insert(input.channel.begin(), std::move(pch));
+
+      return true;
+    }
+    lookup.resize(lookup_table_size, 0);
+    pixel_type idx = 0;
+    for (size_t y = 0; y < h; y++) {
+      const pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) {
+        if (lookup[p[x] - minval] == 0) {
+          lookup[p[x] - minval] = 1;
+          idx++;
+          if (idx > (int)nb_colors) return false;
+        }
+      }
+    }
+    JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+    Channel pch(idx, 1);
+    pch.hshift = -1;
+    pch.vshift = -1;
+    nb_colors = idx;
+    idx = 0;
+    pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+    for (size_t i = 0; i < lookup_table_size; i++) {
+      if (lookup[i]) {
+        p_palette[idx] = i + minval;
+        lookup[i] = idx;
+        idx++;
+      }
+    }
+    for (size_t y = 0; y < h; y++) {
+      pixel_type *p = input.channel[begin_c].Row(y);
+      for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval];
+    }
+    predictor = Predictor::Zero;
+    input.nb_meta_channels++;
+    input.channel.insert(input.channel.begin(), std::move(pch));
+    return true;
+  }
+
+  Image quantized_input;
+  if (lossy) {
+    quantized_input = Image(w, h, input.bitdepth, nb);
+    for (size_t c = 0; c < nb; c++) {
+      CopyImageTo(input.channel[begin_c + c].plane,
+                  &quantized_input.channel[c].plane);
+    }
+  }
+
+  JXL_DEBUG_V(
+      7, "Trying to represent channels %i-%i using at most a %i-color palette.",
+      begin_c, end_c, nb_colors);
+  nb_deltas = 0;
+  bool delta_used = false;
+  std::set<std::vector<pixel_type>>
+      candidate_palette;  // ordered lexicographically
+  std::vector<std::vector<pixel_type>> candidate_palette_imageorder;
+  std::vector<pixel_type> color(nb);
+  std::vector<float> color_with_error(nb);
+  std::vector<const pixel_type *> p_in(nb);
+
+  if (lossy) {
+    palette_iteration_data.FindFrequentColorDeltas(w * h, input.bitdepth);
+    nb_deltas = palette_iteration_data.frequent_deltas[0].size();
+
+    // Count color frequency for colors that make a cross.
+    std::map<std::vector<pixel_type>, size_t> color_freq_map;
+    for (size_t y = 1; y + 1 < h; y++) {
+      for (uint32_t c = 0; c < nb; c++) {
+        p_in[c] = input.channel[begin_c + c].Row(y);
+      }
+      for (size_t x = 1; x + 1 < w; x++) {
+        for (uint32_t c = 0; c < nb; c++) {
+          color[c] = p_in[c][x];
+        }
+        int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}};
+        bool makes_cross = true;
+        for (int i = 0; i < 4 && makes_cross; ++i) {
+          int dx = offsets[i][0];
+          int dy = offsets[i][1];
+          for (uint32_t c = 0; c < nb && makes_cross; c++) {
+            if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) {
+              makes_cross = false;
+            }
+          }
+        }
+        if (makes_cross) color_freq_map[color] += 1;
+      }
+    }
+    // Add colors satisfying frequency condition to the palette.
+    constexpr float kImageFraction = 0.01f;
+    size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction;
+    for (const auto &color_freq : color_freq_map) {
+      if (color_freq.second > color_frequency_lower_bound) {
+        candidate_palette.insert(color_freq.first);
+        candidate_palette_imageorder.push_back(color_freq.first);
+      }
+    }
+  }
+
+  for (size_t y = 0; y < h; y++) {
+    for (uint32_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+    }
+    for (size_t x = 0; x < w; x++) {
+      if (lossy && candidate_palette.size() >= nb_colors) break;
+      for (uint32_t c = 0; c < nb; c++) {
+        color[c] = p_in[c][x];
+      }
+      const bool new_color = candidate_palette.insert(color).second;
+      if (new_color) {
+        candidate_palette_imageorder.push_back(color);
+      }
+      if (candidate_palette.size() > nb_colors) {
+        return false;  // too many colors
+      }
+    }
+  }
+
+  nb_colors = nb_deltas + candidate_palette.size();
+  JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.",
+              begin_c, end_c, nb_colors);
+
+  Channel pch(nb_colors, nb);
+  pch.hshift = -1;
+  pch.vshift = -1;
+  pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+  intptr_t onerow = pch.plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow();
+  const int bit_depth = std::min(input.bitdepth, 24);
+
+  if (lossy) {
+    for (uint32_t i = 0; i < nb_deltas; i++) {
+      for (size_t c = 0; c < 3; c++) {
+        p_palette[c * onerow + i] =
+            palette_iteration_data.frequent_deltas[c][i];
+      }
+    }
+  }
+
+  int x = 0;
+  if (ordered) {
+    JXL_DEBUG_V(7, "Palette of %i colors, using lexicographic order",
+                nb_colors);
+    for (auto pcol : candidate_palette) {
+      JXL_DEBUG_V(9, "  Color %i :  ", x);
+      for (size_t i = 0; i < nb; i++) {
+        p_palette[nb_deltas + i * onerow + x] = pcol[i];
+      }
+      for (size_t i = 0; i < nb; i++) {
+        JXL_DEBUG_V(9, "%i ", pcol[i]);
+      }
+      x++;
+    }
+  } else {
+    JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors);
+    for (auto pcol : candidate_palette_imageorder) {
+      JXL_DEBUG_V(9, "  Color %i :  ", x);
+      for (size_t i = 0; i < nb; i++)
+        p_palette[nb_deltas + i * onerow + x] = pcol[i];
+      for (size_t i = 0; i < nb; i++) JXL_DEBUG_V(9, "%i ", pcol[i]);
+      x++;
+    }
+  }
+  std::vector<weighted::State> wp_states;
+  for (size_t c = 0; c < nb; c++) {
+    wp_states.emplace_back(wp_header, w, h);
+  }
+  std::vector<pixel_type *> p_quant(nb);
+  // Three rows of error for dithering: y to y + 2.
+  // Each row has two pixels of padding in the ends, which is
+  // beneficial for both precision and encoding speed.
+  std::vector<std::vector<float>> error_row[3];
+  if (lossy) {
+    for (int i = 0; i < 3; ++i) {
+      error_row[i].resize(nb);
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[i][c].resize(w + 4);
+      }
+    }
+  }
+  for (size_t y = 0; y < h; y++) {
+    for (size_t c = 0; c < nb; c++) {
+      p_in[c] = input.channel[begin_c + c].Row(y);
+      if (lossy) p_quant[c] = quantized_input.channel[c].Row(y);
+    }
+    pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y);
+    for (size_t x = 0; x < w; x++) {
+      int index;
+      if (!lossy) {
+        for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x];
+        // Exact search.
+        for (index = 0; static_cast<uint32_t>(index) < nb_colors; index++) {
+          bool found = true;
+          for (size_t c = 0; c < nb; c++) {
+            if (color[c] != p_palette[c * onerow + index]) {
+              found = false;
+              break;
+            }
+          }
+          if (found) break;
+        }
+        if (index < static_cast<int>(nb_deltas)) {
+          delta_used = true;
+        }
+      } else {
+        int best_index = 0;
+        bool best_is_delta = false;
+        float best_distance = std::numeric_limits<float>::infinity();
+        std::vector<pixel_type> best_val(nb, 0);
+        std::vector<pixel_type> ideal_residual(nb, 0);
+        std::vector<pixel_type> quantized_val(nb);
+        std::vector<pixel_type> predictions(nb);
+        static const double kDiffusionMultiplier[] = {0.55, 0.75};
+        for (int diffusion_index = 0; diffusion_index < 2; ++diffusion_index) {
+          for (size_t c = 0; c < nb; c++) {
+            color_with_error[c] =
+                p_in[c][x] + palette_iteration_data.final_run *
+                                 kDiffusionMultiplier[diffusion_index] *
+                                 error_row[0][c][x + 2];
+            color[c] = Clamp1(lroundf(color_with_error[c]), 0l,
+                              (1l << input.bitdepth) - 1);
+          }
+
+          for (size_t c = 0; c < nb; ++c) {
+            predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x,
+                                             y, predictor, &wp_states[c])
+                                 .guess;
+          }
+          const auto TryIndex = [&](const int index) {
+            for (size_t c = 0; c < nb; c++) {
+              quantized_val[c] = palette_internal::GetPaletteValue(
+                  p_palette, index, /*c=*/c,
+                  /*palette_size=*/nb_colors,
+                  /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+              if (index < static_cast<int>(nb_deltas)) {
+                quantized_val[c] += predictions[c];
+              }
+            }
+            const float color_distance =
+                32.0 / (1LL << std::max(0, 2 * (bit_depth - 8))) *
+                palette_internal::ColorDistance(color_with_error,
+                                                quantized_val);
+            float index_penalty = 0;
+            if (index == -1) {
+              index_penalty = -124;
+            } else if (index < 0) {
+              index_penalty = -2 * index;
+            } else if (index < static_cast<int>(nb_deltas)) {
+              index_penalty = 250;
+            } else if (index < static_cast<int>(nb_colors)) {
+              index_penalty = 150;
+            } else if (index < static_cast<int>(nb_colors) +
+                                   palette_internal::kLargeCubeOffset) {
+              index_penalty = 70;
+            } else {
+              index_penalty = 256;
+            }
+            const float distance = color_distance + index_penalty;
+            if (distance < best_distance) {
+              best_distance = distance;
+              best_index = index;
+              best_is_delta = index < static_cast<int>(nb_deltas);
+              best_val.swap(quantized_val);
+              for (size_t c = 0; c < nb; ++c) {
+                ideal_residual[c] = color_with_error[c] - predictions[c];
+              }
+            }
+          };
+          for (index = palette_internal::kMinImplicitPaletteIndex;
+               index < static_cast<int32_t>(nb_colors); index++) {
+            TryIndex(index);
+          }
+          TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+              color, nb_colors, bit_depth,
+              /*high_quality=*/false));
+          if (palette_internal::kEncodeToHighQualityImplicitPalette) {
+            TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+                color, nb_colors, bit_depth,
+                /*high_quality=*/true));
+          }
+        }
+        index = best_index;
+        delta_used |= best_is_delta;
+        if (!palette_iteration_data.final_run) {
+          for (size_t c = 0; c < 3; ++c) {
+            palette_iteration_data.deltas[c].push_back(ideal_residual[c]);
+          }
+          palette_iteration_data.delta_distances.push_back(best_distance);
+        }
+
+        for (size_t c = 0; c < nb; ++c) {
+          wp_states[c].UpdateErrors(best_val[c], x, y, w);
+          p_quant[c][x] = best_val[c];
+        }
+        float len_error = 0;
+        for (size_t c = 0; c < nb; ++c) {
+          float local_error = color_with_error[c] - best_val[c];
+          len_error += local_error * local_error;
+        }
+        len_error = sqrt(len_error);
+        float modulate = 1.0;
+        int len_limit = 38 << std::max(0, bit_depth - 8);
+        if (len_error > len_limit) {
+          modulate *= len_limit / len_error;
+        }
+        for (size_t c = 0; c < nb; ++c) {
+          float total_error = (color_with_error[c] - best_val[c]);
+
+          // If the neighboring pixels have some error in the opposite
+          // direction of total_error, cancel some or all of it out before
+          // spreading among them.
+          constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1},
+                                          {1, 3}, {2, 2}, {1, 0}, {1, 4},
+                                          {2, 1}, {2, 3}, {2, 0}, {2, 4}};
+          float total_available = 0;
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_available += error_row[row][c][x + col];
+            }
+          }
+          float weight =
+              std::abs(total_error) / (std::abs(total_available) + 1e-3);
+          weight = std::min(weight, 1.0f);
+          for (int i = 0; i < 11; ++i) {
+            const int row = offsets[i][0];
+            const int col = offsets[i][1];
+            if (std::signbit(error_row[row][c][x + col]) !=
+                std::signbit(total_error)) {
+              total_error += weight * error_row[row][c][x + col];
+              error_row[row][c][x + col] *= (1 - weight);
+            }
+          }
+          total_error *= modulate;
+          const float remaining_error = (1.0f / 14.) * total_error;
+          error_row[0][c][x + 3] += 2 * remaining_error;
+          error_row[0][c][x + 4] += remaining_error;
+          error_row[1][c][x + 0] += remaining_error;
+          for (int i = 0; i < 5; ++i) {
+            error_row[1][c][x + i] += remaining_error;
+            error_row[2][c][x + i] += remaining_error;
+          }
+        }
+      }
+      if (palette_iteration_data.final_run) p[x] = index;
+    }
+    if (lossy) {
+      for (size_t c = 0; c < nb; ++c) {
+        error_row[0][c].swap(error_row[1][c]);
+        error_row[1][c].swap(error_row[2][c]);
+        std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f);
+      }
+    }
+  }
+  if (!delta_used) {
+    predictor = Predictor::Zero;
+  }
+  if (palette_iteration_data.final_run) {
+    input.nb_meta_channels++;
+    input.channel.erase(input.channel.begin() + begin_c + 1,
+                        input.channel.begin() + end_c + 1);
+    input.channel.insert(input.channel.begin(), std::move(pch));
+  }
+  nb_colors -= nb_deltas;
+  return true;
+}
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+                  bool lossy, Predictor &predictor,
+                  const weighted::Header &wp_header) {
+  PaletteIterationData palette_iteration_data;
+  uint32_t nb_colors_orig = nb_colors;
+  uint32_t nb_deltas_orig = nb_deltas;
+  // preprocessing pass in case of lossy palette
+  if (lossy && input.bitdepth >= 8) {
+    JXL_RETURN_IF_ERROR(FwdPaletteIteration(
+        input, begin_c, end_c, nb_colors_orig, nb_deltas_orig, ordered, lossy,
+        predictor, wp_header, palette_iteration_data));
+  }
+  palette_iteration_data.final_run = true;
+  return FwdPaletteIteration(input, begin_c, end_c, nb_colors, nb_deltas,
+                             ordered, lossy, predictor, wp_header,
+                             palette_iteration_data);
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h
new file mode 100644
index 0000000000..0f3d66825b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                  uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+                  bool lossy, Predictor &predictor,
+                  const weighted::Header &wp_header);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc
new file mode 100644
index 0000000000..050563a3c2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_rct.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+Status FwdRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  if (rct_type == 0) {  // noop
+    return false;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  size_t m = begin_c;
+  size_t w = input.channel[m + 0].w;
+  size_t h = input.channel[m + 0].h;
+  int second = (custom % 7) >> 1;
+  int third = (custom % 7) & 1;
+  const auto do_rct = [&](const int y, const int thread) {
+    const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y);
+    const pixel_type* in1 =
+        input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+    const pixel_type* in2 =
+        input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+    pixel_type* out0 = input.channel[m].Row(y);
+    pixel_type* out1 = input.channel[m + 1].Row(y);
+    pixel_type* out2 = input.channel[m + 2].Row(y);
+    if (custom == 6) {
+      for (size_t x = 0; x < w; x++) {
+        pixel_type R = in0[x];
+        pixel_type G = in1[x];
+        pixel_type B = in2[x];
+        out1[x] = R - B;
+        pixel_type tmp = B + (out1[x] >> 1);
+        out2[x] = G - tmp;
+        out0[x] = tmp + (out2[x] >> 1);
+      }
+    } else {
+      for (size_t x = 0; x < w; x++) {
+        pixel_type First = in0[x];
+        pixel_type Second = in1[x];
+        pixel_type Third = in2[x];
+        if (second == 1) {
+          Second = Second - First;
+        } else if (second == 2) {
+          Second = Second - ((First + Third) >> 1);
+        }
+        if (third) Third = Third - First;
+        out0[x] = First;
+        out1[x] = Second;
+        out2[x] = Third;
+      }
+    }
+  };
+  return RunOnPool(pool, 0, h, ThreadPool::NoInit, do_rct, "FwdRCT");
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h
new file mode 100644
index 0000000000..cb5a193c8d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdRCT(Image &input, size_t begin_c, size_t rct_type, ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc
new file mode 100644
index 0000000000..dfd90cde68
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void FwdHSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift);
+  Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1,
+                         chin.vshift);
+
+  for (size_t y = 0; y < chout.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout_residual.w; x++) {
+      pixel_type A = p_in[x * 2];
+      pixel_type B = p_in[x * 2 + 1];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (x + 1 < chout_residual.w) {
+        next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] +
+                    (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >>
+                   1;  // which will be chout.value(y,x+1)
+      } else if (chin.w & 1)
+        next_avg = p_in[x * 2 + 2];
+      pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg);
+      pixel_type tendency = SmoothTendency(left, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+    if (chin.w & 1) {
+      int x = chout.w - 1;
+      p_out[x] = p_in[x * 2];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+void FwdVSqueeze(Image &input, int c, int rc) {
+  const Channel &chin = input.channel[c];
+
+  JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c,
+              rc);
+
+  Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1);
+  Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift,
+                         chin.vshift + 1);
+  intptr_t onerow_in = chin.plane.PixelsPerRow();
+  for (size_t y = 0; y < chout_residual.h; y++) {
+    const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      pixel_type A = p_in[x];
+      pixel_type B = p_in[x + onerow_in];
+      pixel_type avg = (A + B + (A > B)) >> 1;
+      p_out[x] = avg;
+
+      pixel_type diff = A - B;
+
+      pixel_type next_avg = avg;
+      if (y + 1 < chout_residual.h) {
+        next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] +
+                    (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >>
+                   1;  // which will be chout.value(y+1,x)
+      } else if (chin.h & 1) {
+        next_avg = p_in[x + 2 * onerow_in];
+      }
+      pixel_type top =
+          (y > 0 ? p_in[static_cast<ssize_t>(x) - onerow_in] : avg);
+      pixel_type tendency = SmoothTendency(top, avg, next_avg);
+
+      p_res[x] = diff - tendency;
+    }
+  }
+  if (chin.h & 1) {
+    size_t y = chout.h - 1;
+    const pixel_type *p_in = chin.Row(y * 2);
+    pixel_type *p_out = chout.Row(y);
+    for (size_t x = 0; x < chout.w; x++) {
+      p_out[x] = p_in[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  if (parameters.empty()) {
+    DefaultSqueezeParameters(&parameters, input);
+  }
+  // if nothing to do, don't do squeeze
+  if (parameters.empty()) return false;
+  for (size_t i = 0; i < parameters.size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (horizontal) {
+        FwdHSqueeze(input, c, offset + c - beginc);
+      } else {
+        FwdVSqueeze(input, c, offset + c - beginc);
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h
new file mode 100644
index 0000000000..39b001017b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc
new file mode 100644
index 0000000000..bdaaf9f87e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_transform.h"
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+#include "lib/jxl/modular/transform/enc_rct.h"
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool) {
+  switch (t.id) {
+    case TransformId::kRCT:
+      return FwdRCT(input, t.begin_c, t.rct_type, pool);
+    case TransformId::kSqueeze:
+      return FwdSqueeze(input, t.squeezes, pool);
+    case TransformId::kPalette:
+      return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors,
+                        t.nb_deltas, t.ordered_palette, t.lossy_palette,
+                        t.predictor, wp_header);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(t.id));
+  }
+}
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) {
+  pixel_type realmin = std::numeric_limits<pixel_type>::max();
+  pixel_type realmax = std::numeric_limits<pixel_type>::min();
+  for (size_t y = 0; y < ch.h; y++) {
+    const pixel_type *JXL_RESTRICT p = ch.Row(y);
+    for (size_t x = 0; x < ch.w; x++) {
+      if (p[x] < realmin) realmin = p[x];
+      if (p[x] > realmax) realmax = p[x];
+    }
+  }
+
+  if (min) *min = realmin;
+  if (max) *max = realmax;
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h
new file mode 100644
index 0000000000..07659e1b0a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+                        const weighted::Header &wp_header, ThreadPool *pool);
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc
new file mode 100644
index 0000000000..46129f19f0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc
@@ -0,0 +1,176 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+                  uint32_t nb_deltas, Predictor predictor,
+                  const weighted::Header &wp_header, ThreadPool *pool) {
+  if (input.nb_meta_channels < 1) {
+    return JXL_FAILURE("Error: Palette transform without palette.");
+  }
+  std::atomic<int> num_errors{0};
+  int nb = input.channel[0].h;
+  uint32_t c0 = begin_c + 1;
+  if (c0 >= input.channel.size()) {
+    return JXL_FAILURE("Channel is out of range.");
+  }
+  size_t w = input.channel[c0].w;
+  size_t h = input.channel[c0].h;
+  if (nb < 1) return JXL_FAILURE("Corrupted transforms");
+  for (int i = 1; i < nb; i++) {
+    input.channel.insert(
+        input.channel.begin() + c0 + 1,
+        Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift));
+  }
+  const Channel &palette = input.channel[0];
+  const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0);
+  intptr_t onerow = input.channel[0].plane.PixelsPerRow();
+  intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow();
+  const int bit_depth = std::min(input.bitdepth, 24);
+
+  if (w == 0) {
+    // Nothing to do.
+    // Avoid touching "empty" channels with non-zero height.
+  } else if (nb_deltas == 0 && predictor == Predictor::Zero) {
+    if (nb == 1) {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, h, ThreadPool::NoInit,
+          [&](const uint32_t task, size_t /* thread */) {
+            const size_t y = task;
+            pixel_type *p = input.channel[c0].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = Clamp1<int>(p[x], 0, (pixel_type)palette.w - 1);
+              p[x] = palette_internal::GetPaletteValue(
+                  p_palette, index, /*c=*/0,
+                  /*palette_size=*/palette.w,
+                  /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+            }
+          },
+          "UndoChannelPalette"));
+    } else {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, h, ThreadPool::NoInit,
+          [&](const uint32_t task, size_t /* thread */) {
+            const size_t y = task;
+            std::vector<pixel_type *> p_out(nb);
+            const pixel_type *p_index = input.channel[c0].Row(y);
+            for (int c = 0; c < nb; c++)
+              p_out[c] = input.channel[c0 + c].Row(y);
+            for (size_t x = 0; x < w; x++) {
+              const int index = p_index[x];
+              for (int c = 0; c < nb; c++) {
+                p_out[c][x] = palette_internal::GetPaletteValue(
+                    p_palette, index, /*c=*/c,
+                    /*palette_size=*/palette.w,
+                    /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+              }
+            }
+          },
+          "UndoPalette"));
+    }
+  } else {
+    // Parallelized per channel.
+    ImageI indices = CopyImage(input.channel[c0].plane);
+    if (predictor == Predictor::Weighted) {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, nb, ThreadPool::NoInit,
+          [&](const uint32_t c, size_t /* thread */) {
+            Channel &channel = input.channel[c0 + c];
+            weighted::State wp_state(wp_header, channel.w, channel.h);
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w, /*onerow=*/onerow,
+                        /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred =
+                      PredictNoTreeWP(channel.w, p + x, onerow_image, x, y,
+                                      predictor, &wp_state);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+                wp_state.UpdateErrors(p[x], x, y, channel.w);
+              }
+            }
+          },
+          "UndoDeltaPaletteWP"));
+    } else {
+      JXL_RETURN_IF_ERROR(RunOnPool(
+          pool, 0, nb, ThreadPool::NoInit,
+          [&](const uint32_t c, size_t /* thread */) {
+            Channel &channel = input.channel[c0 + c];
+            for (size_t y = 0; y < channel.h; y++) {
+              pixel_type *JXL_RESTRICT p = channel.Row(y);
+              const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+              for (size_t x = 0; x < channel.w; x++) {
+                int index = idx[x];
+                pixel_type_w val = 0;
+                const pixel_type palette_entry =
+                    palette_internal::GetPaletteValue(
+                        p_palette, index, /*c=*/c,
+                        /*palette_size=*/palette.w,
+                        /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+                if (index < static_cast<int32_t>(nb_deltas)) {
+                  PredictionResult pred = PredictNoTreeNoWP(
+                      channel.w, p + x, onerow_image, x, y, predictor);
+                  val = pred.guess + palette_entry;
+                } else {
+                  val = palette_entry;
+                }
+                p[x] = val;
+              }
+            }
+          },
+          "UndoDeltaPaletteNoWP"));
+    }
+  }
+  if (c0 >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels--;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(static_cast<int>(input.nb_meta_channels) >= 2 - nb);
+    input.nb_meta_channels -= 2 - nb;
+    JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels);
+  }
+  input.channel.erase(input.channel.begin(), input.channel.begin() + 1);
+  return num_errors.load(std::memory_order_relaxed) == 0;
+}
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                   uint32_t nb_colors, uint32_t nb_deltas, bool lossy) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+
+  size_t nb = end_c - begin_c + 1;
+  if (begin_c >= input.nb_meta_channels) {
+    // Palette was done on normal channels
+    input.nb_meta_channels++;
+  } else {
+    // Palette was done on metachannels
+    JXL_ASSERT(end_c < input.nb_meta_channels);
+    // we remove nb-1 metachannels and add one
+    input.nb_meta_channels += 2 - nb;
+  }
+  input.channel.erase(input.channel.begin() + begin_c + 1,
+                      input.channel.begin() + end_c + 1);
+  Channel pch(nb_colors + nb_deltas, nb);
+  pch.hshift = -1;
+  pch.vshift = -1;
+  input.channel.insert(input.channel.begin(), std::move(pch));
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
new file mode 100644
index 0000000000..cc0f67960b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr int kMaxPaletteLookupTableSize = 1 << 16;
+
+static constexpr int kRgbChannels = 3;
+
+// 5x5x5 color cube for the larger cube.
+static constexpr int kLargeCube = 5;
+
+// Smaller interleaved color cube to fill the holes of the larger cube.
+static constexpr int kSmallCube = 4;
+static constexpr int kSmallCubeBits = 2;
+// kSmallCube ** 3
+static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube;
+
+static inline pixel_type Scale(uint64_t value, uint64_t bit_depth,
+                               uint64_t denom) {
+  // return (value * ((static_cast<pixel_type_w>(1) << bit_depth) - 1)) / denom;
+  // We only call this function with kSmallCube or kLargeCube - 1 as denom,
+  // allowing us to avoid a division here.
+  JXL_ASSERT(denom == 4);
+  return (value * ((static_cast<uint64_t>(1) << bit_depth) - 1)) >> 2;
+}
+
+// The purpose of this function is solely to extend the interpretation of
+// palette indices to implicit values. If index < nb_deltas, indicating that the
+// result is a delta palette entry, it is the responsibility of the caller to
+// treat it as such.
+static JXL_MAYBE_UNUSED pixel_type
+GetPaletteValue(const pixel_type *const palette, int index, const size_t c,
+                const int palette_size, const int onerow, const int bit_depth) {
+  if (index < 0) {
+    static constexpr std::array<std::array<pixel_type, 3>, 72> kDeltaPalette = {
+        {
+            {{0, 0, 0}},       {{4, 4, 4}},       {{11, 0, 0}},
+            {{0, 0, -13}},     {{0, -12, 0}},     {{-10, -10, -10}},
+            {{-18, -18, -18}}, {{-27, -27, -27}}, {{-18, -18, 0}},
+            {{0, 0, -32}},     {{-32, 0, 0}},     {{-37, -37, -37}},
+            {{0, -32, -32}},   {{24, 24, 45}},    {{50, 50, 50}},
+            {{-45, -24, -24}}, {{-24, -45, -45}}, {{0, -24, -24}},
+            {{-34, -34, 0}},   {{-24, 0, -24}},   {{-45, -45, -24}},
+            {{64, 64, 64}},    {{-32, 0, -32}},   {{0, -32, 0}},
+            {{-32, 0, 32}},    {{-24, -45, -24}}, {{45, 24, 45}},
+            {{24, -24, -45}},  {{-45, -24, 24}},  {{80, 80, 80}},
+            {{64, 0, 0}},      {{0, 0, -64}},     {{0, -64, -64}},
+            {{-24, -24, 45}},  {{96, 96, 96}},    {{64, 64, 0}},
+            {{45, -24, -24}},  {{34, -34, 0}},    {{112, 112, 112}},
+            {{24, -45, -45}},  {{45, 45, -24}},   {{0, -32, 32}},
+            {{24, -24, 45}},   {{0, 96, 96}},     {{45, -24, 24}},
+            {{24, -45, -24}},  {{-24, -45, 24}},  {{0, -64, 0}},
+            {{96, 0, 0}},      {{128, 128, 128}}, {{64, 0, 64}},
+            {{144, 144, 144}}, {{96, 96, 0}},     {{-36, -36, 36}},
+            {{45, -24, -45}},  {{45, -45, -24}},  {{0, 0, -96}},
+            {{0, 128, 128}},   {{0, 96, 0}},      {{45, 24, -45}},
+            {{-128, 0, 0}},    {{24, -45, 24}},   {{-45, 24, -45}},
+            {{64, 0, -64}},    {{64, -64, -64}},  {{96, 0, 96}},
+            {{45, -45, 24}},   {{24, 45, -45}},   {{64, 64, -64}},
+            {{128, 128, 0}},   {{0, 0, -128}},    {{-24, 45, -45}},
+        }};
+    if (c >= kRgbChannels) {
+      return 0;
+    }
+    // Do not open the brackets, otherwise INT32_MIN negation could overflow.
+    index = -(index + 1);
+    index %= 1 + 2 * (kDeltaPalette.size() - 1);
+    static constexpr int kMultiplier[] = {-1, 1};
+    pixel_type result =
+        kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1];
+    if (bit_depth > 8) {
+      result *= static_cast<pixel_type>(1) << (bit_depth - 8);
+    }
+    return result;
+  } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) {
+    if (c >= kRgbChannels) return 0;
+    index -= palette_size;
+    index >>= c * kSmallCubeBits;
+    return Scale(index % kSmallCube, bit_depth, kSmallCube) +
+           (1 << (std::max(0, bit_depth - 3)));
+  } else if (palette_size + kLargeCubeOffset <= index) {
+    if (c >= kRgbChannels) return 0;
+    index -= palette_size + kLargeCubeOffset;
+    // TODO(eustas): should we take care of ambiguity created by
+    //               index >= kLargeCube ** 3 ?
+    switch (c) {
+      case 0:
+        break;
+      case 1:
+        index /= kLargeCube;
+        break;
+      case 2:
+        index /= kLargeCube * kLargeCube;
+        break;
+    }
+    return Scale(index % kLargeCube, bit_depth, kLargeCube - 1);
+  }
+  return palette[c * onerow + static_cast<size_t>(index)];
+}
+
+}  // namespace palette_internal
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+                  uint32_t nb_deltas, Predictor predictor,
+                  const weighted::Header &wp_header, ThreadPool *pool);
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+                   uint32_t nb_colors, uint32_t nb_deltas, bool lossy);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc
new file mode 100644
index 0000000000..f3002a5ac3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/rct.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+template <int transform_type>
+void InvRCTRow(const pixel_type* in0, const pixel_type* in1,
+               const pixel_type* in2, pixel_type* out0, pixel_type* out1,
+               pixel_type* out2, size_t w) {
+  static_assert(transform_type >= 0 && transform_type < 7,
+                "Invalid transform type");
+  int second = transform_type >> 1;
+  int third = transform_type & 1;
+
+  size_t x = 0;
+  const HWY_FULL(pixel_type) d;
+  const size_t N = Lanes(d);
+  for (; x + N - 1 < w; x += N) {
+    if (transform_type == 6) {
+      auto Y = Load(d, in0 + x);
+      auto Co = Load(d, in1 + x);
+      auto Cg = Load(d, in2 + x);
+      Y = Sub(Y, ShiftRight<1>(Cg));
+      auto G = Add(Cg, Y);
+      Y = Sub(Y, ShiftRight<1>(Co));
+      auto R = Add(Y, Co);
+      Store(R, d, out0 + x);
+      Store(G, d, out1 + x);
+      Store(Y, d, out2 + x);
+    } else {
+      auto First = Load(d, in0 + x);
+      auto Second = Load(d, in1 + x);
+      auto Third = Load(d, in2 + x);
+      if (third) Third = Add(Third, First);
+      if (second == 1) {
+        Second = Add(Second, First);
+      } else if (second == 2) {
+        Second = Add(Second, ShiftRight<1>(Add(First, Third)));
+      }
+      Store(First, d, out0 + x);
+      Store(Second, d, out1 + x);
+      Store(Third, d, out2 + x);
+    }
+  }
+  for (; x < w; x++) {
+    if (transform_type == 6) {
+      pixel_type Y = in0[x];
+      pixel_type Co = in1[x];
+      pixel_type Cg = in2[x];
+      pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
+      pixel_type G = PixelAdd(Cg, tmp);
+      pixel_type B = PixelAdd(tmp, -(Co >> 1));
+      pixel_type R = PixelAdd(B, Co);
+      out0[x] = R;
+      out1[x] = G;
+      out2[x] = B;
+    } else {
+      pixel_type First = in0[x];
+      pixel_type Second = in1[x];
+      pixel_type Third = in2[x];
+      if (third) Third = PixelAdd(Third, First);
+      if (second == 1) {
+        Second = PixelAdd(Second, First);
+      } else if (second == 2) {
+        Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
+      }
+      out0[x] = First;
+      out1[x] = Second;
+      out2[x] = Third;
+    }
+  }
+}
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+  size_t m = begin_c;
+  Channel& c0 = input.channel[m + 0];
+  size_t w = c0.w;
+  size_t h = c0.h;
+  if (rct_type == 0) {  // noop
+    return true;
+  }
+  // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+  int permutation = rct_type / 7;
+  JXL_CHECK(permutation < 6);
+  // 0-5 values have the low bit corresponding to Third and the high bits
+  // corresponding to Second. 6 corresponds to YCoCg.
+  //
+  // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+  //
+  // Third: 0=nop, 1=SubtractFirst
+  int custom = rct_type % 7;
+  // Special case: permute-only. Swap channels around.
+  if (custom == 0) {
+    Channel ch0 = std::move(input.channel[m]);
+    Channel ch1 = std::move(input.channel[m + 1]);
+    Channel ch2 = std::move(input.channel[m + 2]);
+    input.channel[m + (permutation % 3)] = std::move(ch0);
+    input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
+        std::move(ch1);
+    input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
+        std::move(ch2);
+    return true;
+  }
+  constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
+      InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
+      InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
+  JXL_RETURN_IF_ERROR(RunOnPool(
+      pool, 0, h, ThreadPool::NoInit,
+      [&](const uint32_t task, size_t /* thread */) {
+        const size_t y = task;
+        const pixel_type* in0 = input.channel[m].Row(y);
+        const pixel_type* in1 = input.channel[m + 1].Row(y);
+        const pixel_type* in2 = input.channel[m + 2].Row(y);
+        pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
+        pixel_type* out1 =
+            input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+        pixel_type* out2 =
+            input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+        inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
+      },
+      "InvRCT"));
+  return true;
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InvRCT);
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+  return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool);
+}
+
+}  // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h
new file mode 100644
index 0000000000..aef65621d5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"  // CheckEqualChannels
+
+namespace jxl {
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_RCT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc
new file mode 100644
index 0000000000..8440d9e804
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc
@@ -0,0 +1,478 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/squeeze.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::MulEven;
+using hwy::HWY_NAMESPACE::Ne;
+using hwy::HWY_NAMESPACE::Neg;
+using hwy::HWY_NAMESPACE::OddEven;
+using hwy::HWY_NAMESPACE::RebindToUnsigned;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Xor;
+
+#if HWY_TARGET != HWY_SCALAR
+
+JXL_INLINE void FastUnsqueeze(const pixel_type *JXL_RESTRICT p_residual,
+                              const pixel_type *JXL_RESTRICT p_avg,
+                              const pixel_type *JXL_RESTRICT p_navg,
+                              const pixel_type *p_pout,
+                              pixel_type *JXL_RESTRICT p_out,
+                              pixel_type *p_nout) {
+  const HWY_CAPPED(pixel_type, 8) d;
+  const RebindToUnsigned<decltype(d)> du;
+  const size_t N = Lanes(d);
+  auto onethird = Set(d, 0x55555556);
+  for (size_t x = 0; x < 8; x += N) {
+    auto avg = Load(d, p_avg + x);
+    auto next_avg = Load(d, p_navg + x);
+    auto top = Load(d, p_pout + x);
+    // Equivalent to SmoothTendency(top,avg,next_avg), but without branches
+    auto Ba = Sub(top, avg);
+    auto an = Sub(avg, next_avg);
+    auto nonmono = Xor(Ba, an);
+    auto absBa = Abs(Ba);
+    auto absan = Abs(an);
+    auto absBn = Abs(Sub(top, next_avg));
+    // Compute a3 = absBa / 3
+    auto a3e = BitCast(d, ShiftRight<32>(MulEven(absBa, onethird)));
+    auto a3oi = MulEven(Reverse(d, absBa), onethird);
+    auto a3o = BitCast(
+        d, Reverse(hwy::HWY_NAMESPACE::Repartition<pixel_type_w, decltype(d)>(),
+                   a3oi));
+    auto a3 = OddEven(a3o, a3e);
+    a3 = Add(a3, Add(absBn, Set(d, 2)));
+    auto absdiff = ShiftRight<2>(a3);
+    auto skipdiff = Ne(Ba, Zero(d));
+    skipdiff = And(skipdiff, Ne(an, Zero(d)));
+    skipdiff = And(skipdiff, Lt(nonmono, Zero(d)));
+    auto absBa2 = Add(ShiftLeft<1>(absBa), And(absdiff, Set(d, 1)));
+    absdiff = IfThenElse(Gt(absdiff, absBa2),
+                         Add(ShiftLeft<1>(absBa), Set(d, 1)), absdiff);
+    auto absan2 = ShiftLeft<1>(absan);
+    absdiff = IfThenElse(Gt(Add(absdiff, And(absdiff, Set(d, 1))), absan2),
+                         absan2, absdiff);
+    auto diff1 = IfThenElse(Lt(top, next_avg), Neg(absdiff), absdiff);
+    auto tendency = IfThenZeroElse(skipdiff, diff1);
+
+    auto diff_minus_tendency = Load(d, p_residual + x);
+    auto diff = Add(diff_minus_tendency, tendency);
+    auto out =
+        Add(avg, ShiftRight<1>(
+                     Add(diff, BitCast(d, ShiftRight<31>(BitCast(du, diff))))));
+    Store(out, d, p_out + x);
+    Store(Sub(out, diff), d, p_nout + x);
+  }
+}
+
+#endif
+
+Status InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2));
+  JXL_ASSERT(chin.h == chin_residual.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].hshift--;
+    return true;
+  }
+
+  // Note: chin.w >= chin_residual.w and at most 1 different.
+  Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift);
+  JXL_DEBUG_V(4,
+              "Undoing horizontal squeeze of channel %i using residuals in "
+              "channel %i (going from width %" PRIuS " to %" PRIuS ")",
+              c, rc, chin.w, chout.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return true;
+  }
+  auto unsqueeze_row = [&](size_t y, size_t x0) {
+    const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y);
+    const pixel_type *JXL_RESTRICT p_avg = chin.Row(y);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+    for (size_t x = x0; x < chin_residual.w; x++) {
+      pixel_type_w diff_minus_tendency = p_residual[x];
+      pixel_type_w avg = p_avg[x];
+      pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg);
+      pixel_type_w left = (x ? p_out[(x << 1) - 1] : avg);
+      pixel_type_w tendency = SmoothTendency(left, avg, next_avg);
+      pixel_type_w diff = diff_minus_tendency + tendency;
+      pixel_type_w A = avg + (diff / 2);
+      p_out[(x << 1)] = A;
+      pixel_type_w B = A - diff;
+      p_out[(x << 1) + 1] = B;
+    }
+    if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1];
+  };
+
+  // somewhat complicated trickery just to be able to SIMD this.
+  // Horizontal unsqueeze has horizontal data dependencies, so we do
+  // 8 rows at a time and treat it as a vertical unsqueeze of a
+  // transposed 8x8 block (or 9x8 for one input).
+  static constexpr const size_t kRowsPerThread = 8;
+  const auto unsqueeze_span = [&](const uint32_t task, size_t /* thread */) {
+    const size_t y0 = task * kRowsPerThread;
+    const size_t rows = std::min(kRowsPerThread, chin.h - y0);
+    size_t x = 0;
+
+#if HWY_TARGET != HWY_SCALAR
+    intptr_t onerow_in = chin.plane.PixelsPerRow();
+    intptr_t onerow_inr = chin_residual.plane.PixelsPerRow();
+    intptr_t onerow_out = chout.plane.PixelsPerRow();
+    const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y0);
+    const pixel_type *JXL_RESTRICT p_avg = chin.Row(y0);
+    pixel_type *JXL_RESTRICT p_out = chout.Row(y0);
+    HWY_ALIGN pixel_type b_p_avg[9 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_residual[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_even[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_odd[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_evenT[8 * kRowsPerThread];
+    HWY_ALIGN pixel_type b_p_out_oddT[8 * kRowsPerThread];
+    const HWY_CAPPED(pixel_type, 8) d;
+    const size_t N = Lanes(d);
+    if (chin_residual.w > 16 && rows == kRowsPerThread) {
+      for (; x < chin_residual.w - 9; x += 8) {
+        Transpose8x8Block(p_residual + x, b_p_residual, onerow_inr);
+        Transpose8x8Block(p_avg + x, b_p_avg, onerow_in);
+        for (size_t y = 0; y < kRowsPerThread; y++) {
+          b_p_avg[8 * 8 + y] = p_avg[x + 8 + onerow_in * y];
+        }
+        for (size_t i = 0; i < 8; i++) {
+          FastUnsqueeze(
+              b_p_residual + 8 * i, b_p_avg + 8 * i, b_p_avg + 8 * (i + 1),
+              (x + i ? b_p_out_odd + 8 * ((x + i - 1) & 7) : b_p_avg + 8 * i),
+              b_p_out_even + 8 * i, b_p_out_odd + 8 * i);
+        }
+
+        Transpose8x8Block(b_p_out_even, b_p_out_evenT, 8);
+        Transpose8x8Block(b_p_out_odd, b_p_out_oddT, 8);
+        for (size_t y = 0; y < kRowsPerThread; y++) {
+          for (size_t i = 0; i < kRowsPerThread; i += N) {
+            auto even = Load(d, b_p_out_evenT + 8 * y + i);
+            auto odd = Load(d, b_p_out_oddT + 8 * y + i);
+            StoreInterleaved(d, even, odd,
+                             p_out + ((x + i) << 1) + onerow_out * y);
+          }
+        }
+      }
+    }
+#endif
+    for (size_t y = 0; y < rows; y++) {
+      unsqueeze_row(y0 + y, x);
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.h, kRowsPerThread),
+                                ThreadPool::NoInit, unsqueeze_span,
+                                "InvHorizontalSqueeze"));
+  input.channel[c] = std::move(chout);
+  return true;
+}
+
+Status InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+  JXL_ASSERT(c < input.channel.size());
+  JXL_ASSERT(rc < input.channel.size());
+  const Channel &chin = input.channel[c];
+  const Channel &chin_residual = input.channel[rc];
+  // These must be valid since we ran MetaApply already.
+  JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2));
+  JXL_ASSERT(chin.w == chin_residual.w);
+
+  if (chin_residual.h == 0) {
+    // Short-circuit: output channel has same dimensions as input.
+    input.channel[c].vshift--;
+    return true;
+  }
+
+  // Note: chin.h >= chin_residual.h and at most 1 different.
+  Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1);
+  JXL_DEBUG_V(
+      4,
+      "Undoing vertical squeeze of channel %i using residuals in channel "
+      "%i (going from height %" PRIuS " to %" PRIuS ")",
+      c, rc, chin.h, chout.h);
+
+  if (chin_residual.w == 0) {
+    // Short-circuit: channel with no pixels.
+    input.channel[c] = std::move(chout);
+    return true;
+  }
+
+  static constexpr const int kColsPerThread = 64;
+  const auto unsqueeze_slice = [&](const uint32_t task, size_t /* thread */) {
+    const size_t x0 = task * kColsPerThread;
+    const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w);
+    const size_t w = x1 - x0;
+    // We only iterate up to std::min(chin_residual.h, chin.h) which is
+    // always chin_residual.h.
+    for (size_t y = 0; y < chin_residual.h; y++) {
+      const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y) + x0;
+      const pixel_type *JXL_RESTRICT p_avg = chin.Row(y) + x0;
+      const pixel_type *JXL_RESTRICT p_navg =
+          chin.Row(y + 1 < chin.h ? y + 1 : y) + x0;
+      pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1) + x0;
+      pixel_type *JXL_RESTRICT p_nout = chout.Row((y << 1) + 1) + x0;
+      const pixel_type *p_pout = y > 0 ? chout.Row((y << 1) - 1) + x0 : p_avg;
+      size_t x = 0;
+#if HWY_TARGET != HWY_SCALAR
+      for (; x + 7 < w; x += 8) {
+        FastUnsqueeze(p_residual + x, p_avg + x, p_navg + x, p_pout + x,
+                      p_out + x, p_nout + x);
+      }
+#endif
+      for (; x < w; x++) {
+        pixel_type_w avg = p_avg[x];
+        pixel_type_w next_avg = p_navg[x];
+        pixel_type_w top = p_pout[x];
+        pixel_type_w tendency = SmoothTendency(top, avg, next_avg);
+        pixel_type_w diff_minus_tendency = p_residual[x];
+        pixel_type_w diff = diff_minus_tendency + tendency;
+        pixel_type_w out = avg + (diff / 2);
+        p_out[x] = out;
+        // If the chin_residual.h == chin.h, the output has an even number
+        // of rows so the next line is fine. Otherwise, this loop won't
+        // write to the last output row which is handled separately.
+        p_nout[x] = out - diff;
+      }
+    }
+  };
+  JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.w, kColsPerThread),
+                                ThreadPool::NoInit, unsqueeze_slice,
+                                "InvVertSqueeze"));
+
+  if (chout.h & 1) {
+    size_t y = chin.h - 1;
+    const pixel_type *p_avg = chin.Row(y);
+    pixel_type *p_out = chout.Row(y << 1);
+    for (size_t x = 0; x < chin.w; x++) {
+      p_out[x] = p_avg[x];
+    }
+  }
+  input.channel[c] = std::move(chout);
+  return true;
+}
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  for (int i = parameters.size() - 1; i >= 0; i--) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+    bool horizontal = parameters[i].horizontal;
+    bool in_place = parameters[i].in_place;
+    uint32_t beginc = parameters[i].begin_c;
+    uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+    uint32_t offset;
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = input.channel.size() + beginc - endc - 1;
+    }
+    if (beginc < input.nb_meta_channels) {
+      // This is checked in MetaSqueeze.
+      JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c);
+      input.nb_meta_channels -= parameters[i].num_c;
+    }
+
+    for (uint32_t c = beginc; c <= endc; c++) {
+      uint32_t rc = offset + c - beginc;
+      // MetaApply should imply that `rc` is within range, otherwise there's a
+      // programming bug.
+      JXL_ASSERT(rc < input.channel.size());
+      if ((input.channel[c].w < input.channel[rc].w) ||
+          (input.channel[c].h < input.channel[rc].h)) {
+        return JXL_FAILURE("Corrupted squeeze transform");
+      }
+      if (horizontal) {
+        JXL_RETURN_IF_ERROR(InvHSqueeze(input, c, rc, pool));
+      } else {
+        JXL_RETURN_IF_ERROR(InvVSqueeze(input, c, rc, pool));
+      }
+    }
+    input.channel.erase(input.channel.begin() + offset,
+                        input.channel.begin() + offset + (endc - beginc + 1));
+  }
+  return true;
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+
+HWY_EXPORT(InvSqueeze);
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool) {
+  return HWY_DYNAMIC_DISPATCH(InvSqueeze)(input, parameters, pool);
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image) {
+  int nb_channels = image.channel.size() - image.nb_meta_channels;
+
+  parameters->clear();
+  size_t w = image.channel[image.nb_meta_channels].w;
+  size_t h = image.channel[image.nb_meta_channels].h;
+  JXL_DEBUG_V(
+      7, "Default squeeze parameters for %" PRIuS "x%" PRIuS " image: ", w, h);
+
+  // do horizontal first on wide images; vertical first on tall images
+  bool wide = (w > h);
+
+  if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w &&
+      image.channel[image.nb_meta_channels + 1].h == h) {
+    // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0
+    // previews
+    JXL_DEBUG_V(7, "(4:2:0 chroma), %" PRIuS "x%" PRIuS " image", w, h);
+    SqueezeParams params;
+    // horizontal chroma squeeze
+    params.horizontal = true;
+    params.in_place = false;
+    params.begin_c = image.nb_meta_channels + 1;
+    params.num_c = 2;
+    parameters->push_back(params);
+    params.horizontal = false;
+    // vertical chroma squeeze
+    parameters->push_back(params);
+  }
+  SqueezeParams params;
+  params.begin_c = image.nb_meta_channels;
+  params.num_c = nb_channels;
+  params.in_place = true;
+
+  if (!wide) {
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+  }
+  while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+    if (w > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = true;
+      parameters->push_back(params);
+      w = (w + 1) / 2;
+      JXL_DEBUG_V(7, "Horizontal (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+    if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+      params.horizontal = false;
+      parameters->push_back(params);
+      h = (h + 1) / 2;
+      JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+    }
+  }
+  JXL_DEBUG_V(7, "that's it");
+}
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter,
+                              int num_channels) {
+  int c1 = parameter.begin_c;
+  int c2 = parameter.begin_c + parameter.num_c - 1;
+  if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range");
+  }
+  return true;
+}
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters) {
+  if (parameters->empty()) {
+    DefaultSqueezeParameters(parameters, image);
+  }
+
+  for (size_t i = 0; i < parameters->size(); i++) {
+    JXL_RETURN_IF_ERROR(
+        CheckMetaSqueezeParams((*parameters)[i], image.channel.size()));
+    bool horizontal = (*parameters)[i].horizontal;
+    bool in_place = (*parameters)[i].in_place;
+    uint32_t beginc = (*parameters)[i].begin_c;
+    uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1;
+
+    uint32_t offset;
+    if (beginc < image.nb_meta_channels) {
+      if (endc >= image.nb_meta_channels) {
+        return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels");
+      }
+      if (!in_place) {
+        return JXL_FAILURE(
+            "Invalid squeeze: meta channels require in-place residuals");
+      }
+      image.nb_meta_channels += (*parameters)[i].num_c;
+    }
+    if (in_place) {
+      offset = endc + 1;
+    } else {
+      offset = image.channel.size();
+    }
+    for (uint32_t c = beginc; c <= endc; c++) {
+      if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) {
+        return JXL_FAILURE("Too many squeezes: shift > 30");
+      }
+      size_t w = image.channel[c].w;
+      size_t h = image.channel[c].h;
+      if (w == 0 || h == 0) return JXL_FAILURE("Squeezing empty channel");
+      if (horizontal) {
+        image.channel[c].w = (w + 1) / 2;
+        if (image.channel[c].hshift >= 0) image.channel[c].hshift++;
+        w = w - (w + 1) / 2;
+      } else {
+        image.channel[c].h = (h + 1) / 2;
+        if (image.channel[c].vshift >= 0) image.channel[c].vshift++;
+        h = h - (h + 1) / 2;
+      }
+      image.channel[c].shrink();
+      Channel dummy(w, h);
+      dummy.hshift = image.channel[c].hshift;
+      dummy.vshift = image.channel[c].vshift;
+
+      image.channel.insert(image.channel.begin() + offset + (c - beginc),
+                           std::move(dummy));
+      JXL_DEBUG_V(8, "MetaSqueeze applied, current image: %s",
+                  image.DebugString().c_str());
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
+
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h
new file mode 100644
index 0000000000..fb18710a6f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+
+// Haar-like transform: halves the resolution in one direction
+// A B   -> (A+B)>>1              in one channel (average)  -> same range as
+// original channel
+//          A-B - tendency        in a new channel ('residual' needed to make
+//          the transform reversible)
+//                                        -> theoretically range could be 2.5
+//                                        times larger (2 times without the
+//                                        'tendency'), but there should be lots
+//                                        of zeroes
+// Repeated application (alternating horizontal and vertical squeezes) results
+// in downscaling
+//
+// The default coefficient ordering is low-frequency to high-frequency, as in
+// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using
+// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp.
+// 205-220, April 1992, doi: 10.1109/83.136597.
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+#define JXL_MAX_FIRST_PREVIEW_SIZE 8
+
+namespace jxl {
+
+/*
+        int avg=(A+B)>>1;
+        int diff=(A-B);
+        int rA=(diff+(avg<<1)+(diff&1))>>1;
+        int rB=rA-diff;
+
+*/
+//         |A B|C D|E F|
+//           p   a   n             p=avg(A,B), a=avg(C,D), n=avg(E,F)
+//
+// Goal: estimate C-D (avoiding ringing artifacts)
+// (ensuring that in smooth areas, a zero residual corresponds to a smooth
+// gradient)
+
+// best estimate for C: (B + 2*a)/3
+// best estimate for D: (n + 3*a)/4
+// best estimate for C-D:  4*B - 3*n - a /12
+
+// avoid ringing by 1) only doing this if B <= a <= n  or  B >= a >= n
+// (otherwise, this is not a smooth area and we cannot really estimate C-D)
+//                  2) making sure that B <= C <= D <= n  or B >= C >= D >= n
+
+inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a,
+                                   pixel_type_w n) {
+  pixel_type_w diff = 0;
+  if (B >= a && a >= n) {
+    diff = (4 * B - 3 * n - a + 6) / 12;
+    //      2C = a<<1 + diff - diff&1 <= 2B  so diff - diff&1 <= 2B - 2a
+    //      2D = a<<1 - diff - diff&1 >= 2n  so diff + diff&1 <= 2a - 2n
+    if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1;
+    if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n);
+  } else if (B <= a && a <= n) {
+    diff = (4 * B - 3 * n - a - 6) / 12;
+    //      2C = a<<1 + diff + diff&1 >= 2B  so diff + diff&1 >= 2B - 2a
+    //      2D = a<<1 - diff + diff&1 <= 2n  so diff - diff&1 >= 2a - 2n
+    if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1;
+    if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n);
+  }
+  return diff;
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+                              const Image &image);
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter, int num_channels);
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters);
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+                  ThreadPool *pool);
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc
new file mode 100644
index 0000000000..d9f2b435bf
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/transform.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/palette.h"
+#include "lib/jxl/modular/transform/rct.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+
+namespace jxl {
+
+SqueezeParams::SqueezeParams() { Bundle::Init(this); }
+Transform::Transform(TransformId id) {
+  Bundle::Init(this);
+  this->id = id;
+}
+
+Status Transform::Inverse(Image &input, const weighted::Header &wp_header,
+                          ThreadPool *pool) {
+  JXL_DEBUG_V(6, "Input channels (%" PRIuS ", %" PRIuS " meta): ",
+              input.channel.size(), input.nb_meta_channels);
+  switch (id) {
+    case TransformId::kRCT:
+      return InvRCT(input, begin_c, rct_type, pool);
+    case TransformId::kSqueeze:
+      return InvSqueeze(input, squeezes, pool);
+    case TransformId::kPalette:
+      return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor,
+                        wp_header, pool);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status Transform::MetaApply(Image &input) {
+  JXL_DEBUG_V(6, "MetaApply input: %s", input.DebugString().c_str());
+  switch (id) {
+    case TransformId::kRCT:
+      JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type);
+      return CheckEqualChannels(input, begin_c, begin_c + 2);
+    case TransformId::kSqueeze:
+      JXL_DEBUG_V(2, "Transform: kSqueeze:");
+#if JXL_DEBUG_V_LEVEL >= 2
+      {
+        auto squeezes_copy = squeezes;
+        if (squeezes_copy.empty()) {
+          DefaultSqueezeParameters(&squeezes_copy, input);
+        }
+        for (const auto &params : squeezes_copy) {
+          JXL_DEBUG_V(
+              2,
+              "  squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32
+              ", num_c=%" PRIu32,
+              params.horizontal, params.in_place, params.begin_c, params.num_c);
+        }
+      }
+#endif
+      return MetaSqueeze(input, &squeezes);
+    case TransformId::kPalette:
+      JXL_DEBUG_V(2,
+                  "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32
+                  ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32,
+                  begin_c, num_c, nb_colors, nb_deltas);
+      return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors,
+                         nb_deltas, lossy_palette);
+    default:
+      return JXL_FAILURE("Unknown transformation (ID=%u)",
+                         static_cast<unsigned int>(id));
+  }
+}
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) {
+  if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) {
+    return JXL_FAILURE("Invalid channel range: %u..%u (there are only %" PRIuS
+                       " channels)",
+                       c1, c2, image.channel.size());
+  }
+  if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) {
+    return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta");
+  }
+  const auto &ch1 = image.channel[c1];
+  for (size_t c = c1 + 1; c <= c2; c++) {
+    const auto &ch2 = image.channel[c];
+    if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift ||
+        ch1.vshift != ch2.vshift) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h
new file mode 100644
index 0000000000..d5d3259f7a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+enum class TransformId : uint32_t {
+  // G, R-G, B-G and variants (including YCoCg).
+  kRCT = 0,
+
+  // Color palette. Parameters are: [begin_c] [end_c] [nb_colors]
+  kPalette = 1,
+
+  // Squeezing (Haar-style)
+  kSqueeze = 2,
+
+  // Invalid for now.
+  kInvalid = 3,
+};
+
+struct SqueezeParams : public Fields {
+  JXL_FIELDS_NAME(SqueezeParams)
+  bool horizontal;
+  bool in_place;
+  uint32_t begin_c;
+  uint32_t num_c;
+  SqueezeParams();
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place));
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8),
+                                           BitsOffset(10, 72),
+                                           BitsOffset(13, 1096), 0, &begin_c));
+    JXL_QUIET_RETURN_IF_ERROR(
+        visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c));
+    return true;
+  }
+};
+
+class Transform : public Fields {
+ public:
+  TransformId id;
+  // for Palette and RCT.
+  uint32_t begin_c;
+  // for RCT. 42 possible values starting from 0.
+  uint32_t rct_type;
+  // Only for Palette and NearLossless.
+  uint32_t num_c;
+  // Only for Palette.
+  uint32_t nb_colors;
+  uint32_t nb_deltas;
+  // for Squeeze. Default squeeze if empty.
+  std::vector<SqueezeParams> squeezes;
+  // for NearLossless, not serialized.
+  int max_delta_error;
+  // Serialized for Palette.
+  Predictor predictor;
+  // for Palette, not serialized.
+  bool ordered_palette = true;
+  bool lossy_palette = false;
+
+  explicit Transform(TransformId id);
+  // default constructor for bundles.
+  Transform() : Transform(TransformId::kInvalid) {}
+
+  Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+    JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+        Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette),
+        Val((uint32_t)TransformId::kSqueeze),
+        Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT,
+        reinterpret_cast<uint32_t *>(&id)));
+    if (id == TransformId::kInvalid) {
+      return JXL_FAILURE("Invalid transform ID");
+    }
+    if (visitor->Conditional(id == TransformId::kRCT ||
+                             id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72),
+                       BitsOffset(13, 1096), 0, &begin_c));
+    }
+    if (visitor->Conditional(id == TransformId::kRCT)) {
+      // 0-41, default YCoCg.
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2),
+                                             BitsOffset(6, 10), 6, &rct_type));
+      if (rct_type >= 42) {
+        return JXL_FAILURE("Invalid transform RCT type");
+      }
+    }
+    if (visitor->Conditional(id == TransformId::kPalette)) {
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c));
+      JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+          BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280),
+          BitsOffset(16, 5376), 256, &nb_colors));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257),
+                       BitsOffset(16, 1281), 0, &nb_deltas));
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->Bits(4, (uint32_t)Predictor::Zero,
+                        reinterpret_cast<uint32_t *>(&predictor)));
+      if (predictor >= Predictor::Best) {
+        return JXL_FAILURE("Invalid predictor");
+      }
+    }
+
+    if (visitor->Conditional(id == TransformId::kSqueeze)) {
+      uint32_t num_squeezes = static_cast<uint32_t>(squeezes.size());
+      JXL_QUIET_RETURN_IF_ERROR(
+          visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9),
+                       BitsOffset(8, 41), 0, &num_squeezes));
+      if (visitor->IsReading()) squeezes.resize(num_squeezes);
+      for (size_t i = 0; i < num_squeezes; i++) {
+        JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i]));
+      }
+    }
+    return true;
+  }
+
+  JXL_FIELDS_NAME(Transform)
+
+  Status Inverse(Image &input, const weighted::Header &wp_header,
+                 ThreadPool *pool = nullptr);
+  Status MetaApply(Image &input);
+};
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2);
+
+static inline pixel_type PixelAdd(pixel_type a, pixel_type b) {
+  return static_cast<pixel_type>(static_cast<uint32_t>(a) +
+                                 static_cast<uint32_t>(b));
+}
+
+}  // namespace jxl
+
+#endif  // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
-- 
cgit v1.2.3