summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc')
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc813
1 files changed, 813 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc
new file mode 100644
index 0000000000..157e18c3a8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc
@@ -0,0 +1,813 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <atomic>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_dot_dictionary.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+static constexpr size_t kPatchFrameReferenceId = 3;
+
+// static
+void PatchDictionaryEncoder::Encode(const PatchDictionary& pdic,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ JXL_ASSERT(pdic.HasAny());
+ std::vector<std::vector<Token>> tokens(1);
+ size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+
+ auto add_num = [&](int context, size_t num) {
+ tokens[0].emplace_back(context, num);
+ };
+ size_t num_ref_patch = 0;
+ for (size_t i = 0; i < pdic.positions_.size();) {
+ size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+ while (i < pdic.positions_.size() &&
+ pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+ i++;
+ }
+ num_ref_patch++;
+ }
+ add_num(kNumRefPatchContext, num_ref_patch);
+ size_t blend_pos = 0;
+ for (size_t i = 0; i < pdic.positions_.size();) {
+ size_t i_start = i;
+ size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+ const auto& ref_pos = pdic.ref_positions_[ref_pos_idx];
+ while (i < pdic.positions_.size() &&
+ pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+ i++;
+ }
+ size_t num = i - i_start;
+ JXL_ASSERT(num > 0);
+ add_num(kReferenceFrameContext, ref_pos.ref);
+ add_num(kPatchReferencePositionContext, ref_pos.x0);
+ add_num(kPatchReferencePositionContext, ref_pos.y0);
+ add_num(kPatchSizeContext, ref_pos.xsize - 1);
+ add_num(kPatchSizeContext, ref_pos.ysize - 1);
+ add_num(kPatchCountContext, num - 1);
+ for (size_t j = i_start; j < i; j++) {
+ const PatchPosition& pos = pdic.positions_[j];
+ if (j == i_start) {
+ add_num(kPatchPositionContext, pos.x);
+ add_num(kPatchPositionContext, pos.y);
+ } else {
+ add_num(kPatchOffsetContext,
+ PackSigned(pos.x - pdic.positions_[j - 1].x));
+ add_num(kPatchOffsetContext,
+ PackSigned(pos.y - pdic.positions_[j - 1].y));
+ }
+ for (size_t j = 0; j < num_ec + 1; ++j, ++blend_pos) {
+ const PatchBlending& info = pdic.blendings_[blend_pos];
+ add_num(kPatchBlendModeContext, static_cast<uint32_t>(info.mode));
+ if (UsesAlpha(info.mode) &&
+ pdic.shared_->metadata->m.extra_channel_info.size() > 1) {
+ add_num(kPatchAlphaChannelContext, info.alpha_channel);
+ }
+ if (UsesClamp(info.mode)) {
+ add_num(kPatchClampContext, info.clamp);
+ }
+ }
+ }
+ }
+
+ EntropyEncodingData codes;
+ std::vector<uint8_t> context_map;
+ BuildAndEncodeHistograms(HistogramParams(), kNumPatchDictionaryContexts,
+ tokens, &codes, &context_map, writer, layer,
+ aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+// static
+void PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic,
+ Image3F* opsin) {
+ size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+ // TODO(veluca): this can likely be optimized knowing it runs on full images.
+ for (size_t y = 0; y < opsin->ysize(); y++) {
+ float* JXL_RESTRICT rows[3] = {
+ opsin->PlaneRow(0, y),
+ opsin->PlaneRow(1, y),
+ opsin->PlaneRow(2, y),
+ };
+ for (size_t pos_idx : pdic.GetPatchesForRow(y)) {
+ const size_t blending_idx = pos_idx * (num_ec + 1);
+ const PatchPosition& pos = pdic.positions_[pos_idx];
+ const PatchReferencePosition& ref_pos =
+ pdic.ref_positions_[pos.ref_pos_idx];
+ const PatchBlendMode mode = pdic.blendings_[blending_idx].mode;
+ size_t by = pos.y;
+ size_t bx = pos.x;
+ size_t xsize = ref_pos.xsize;
+ JXL_DASSERT(y >= by);
+ JXL_DASSERT(y < by + ref_pos.ysize);
+ size_t iy = y - by;
+ size_t ref = ref_pos.ref;
+ const float* JXL_RESTRICT ref_rows[3] = {
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 0, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 1, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 2, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ };
+ for (size_t ix = 0; ix < xsize; ix++) {
+ for (size_t c = 0; c < 3; c++) {
+ if (mode == PatchBlendMode::kAdd) {
+ rows[c][bx + ix] -= ref_rows[c][ix];
+ } else if (mode == PatchBlendMode::kReplace) {
+ rows[c][bx + ix] = 0;
+ } else if (mode == PatchBlendMode::kNone) {
+ // Nothing to do.
+ } else {
+ JXL_ABORT("Blending mode %u not yet implemented", (uint32_t)mode);
+ }
+ }
+ }
+ }
+ }
+}
+
+namespace {
+
+struct PatchColorspaceInfo {
+ float kChannelDequant[3];
+ float kChannelWeights[3];
+
+ explicit PatchColorspaceInfo(bool is_xyb) {
+ if (is_xyb) {
+ kChannelDequant[0] = 0.01615;
+ kChannelDequant[1] = 0.08875;
+ kChannelDequant[2] = 0.1922;
+ kChannelWeights[0] = 30.0;
+ kChannelWeights[1] = 3.0;
+ kChannelWeights[2] = 1.0;
+ } else {
+ kChannelDequant[0] = 20.0f / 255;
+ kChannelDequant[1] = 22.0f / 255;
+ kChannelDequant[2] = 20.0f / 255;
+ kChannelWeights[0] = 0.017 * 255;
+ kChannelWeights[1] = 0.02 * 255;
+ kChannelWeights[2] = 0.017 * 255;
+ }
+ }
+
+ float ScaleForQuantization(float val, size_t c) {
+ return val / kChannelDequant[c];
+ }
+
+ int Quantize(float val, size_t c) {
+ return truncf(ScaleForQuantization(val, c));
+ }
+
+ bool is_similar_v(const float v1[3], const float v2[3], float threshold) {
+ float distance = 0;
+ for (size_t c = 0; c < 3; c++) {
+ distance += std::fabs(v1[c] - v2[c]) * kChannelWeights[c];
+ }
+ return distance <= threshold;
+ }
+};
+
+std::vector<PatchInfo> FindTextLikePatches(
+ const Image3F& opsin, const PassesEncoderState* JXL_RESTRICT state,
+ ThreadPool* pool, AuxOut* aux_out, bool is_xyb) {
+ if (state->cparams.patches == Override::kOff) return {};
+
+ PatchColorspaceInfo pci(is_xyb);
+ float kSimilarThreshold = 0.8f;
+
+ auto is_similar_impl = [&pci](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2,
+ const float* JXL_RESTRICT rows[3],
+ size_t stride, float threshold) {
+ float v1[3], v2[3];
+ for (size_t c = 0; c < 3; c++) {
+ v1[c] = rows[c][p1.second * stride + p1.first];
+ v2[c] = rows[c][p2.second * stride + p2.first];
+ }
+ return pci.is_similar_v(v1, v2, threshold);
+ };
+
+ std::atomic<bool> has_screenshot_areas{false};
+ const size_t opsin_stride = opsin.PixelsPerRow();
+ const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0),
+ opsin.ConstPlaneRow(1, 0),
+ opsin.ConstPlaneRow(2, 0)};
+
+ auto is_same = [&opsin_rows, opsin_stride](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2) {
+ for (size_t c = 0; c < 3; c++) {
+ float v1 = opsin_rows[c][p1.second * opsin_stride + p1.first];
+ float v2 = opsin_rows[c][p2.second * opsin_stride + p2.first];
+ if (std::fabs(v1 - v2) > 1e-4) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ auto is_similar = [&](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2) {
+ return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold);
+ };
+
+ constexpr int64_t kPatchSide = 4;
+ constexpr int64_t kExtraSide = 4;
+
+ // Look for kPatchSide size squares, naturally aligned, that all have the same
+ // pixel values.
+ ImageB is_screenshot_like(DivCeil(opsin.xsize(), kPatchSide),
+ DivCeil(opsin.ysize(), kPatchSide));
+ ZeroFillImage(&is_screenshot_like);
+ uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0);
+ const size_t screenshot_stride = is_screenshot_like.PixelsPerRow();
+ const auto process_row = [&](const uint32_t y, size_t /* thread */) {
+ for (uint64_t x = 0; x < opsin.xsize() / kPatchSide; x++) {
+ bool all_same = true;
+ for (size_t iy = 0; iy < static_cast<size_t>(kPatchSide); iy++) {
+ for (size_t ix = 0; ix < static_cast<size_t>(kPatchSide); ix++) {
+ size_t cx = x * kPatchSide + ix;
+ size_t cy = y * kPatchSide + iy;
+ if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) {
+ all_same = false;
+ break;
+ }
+ }
+ }
+ if (!all_same) continue;
+ size_t num = 0;
+ size_t num_same = 0;
+ for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) {
+ for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) {
+ int64_t cx = x * kPatchSide + ix;
+ int64_t cy = y * kPatchSide + iy;
+ if (cx < 0 || static_cast<uint64_t>(cx) >= opsin.xsize() || //
+ cy < 0 || static_cast<uint64_t>(cy) >= opsin.ysize()) {
+ continue;
+ }
+ num++;
+ if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++;
+ }
+ }
+ // Too few equal pixels nearby.
+ if (num_same * 8 < num * 7) continue;
+ screenshot_row[y * screenshot_stride + x] = 1;
+ has_screenshot_areas = true;
+ }
+ };
+ JXL_CHECK(RunOnPool(pool, 0, opsin.ysize() / kPatchSide, ThreadPool::NoInit,
+ process_row, "IsScreenshotLike"));
+
+ // TODO(veluca): also parallelize the rest of this function.
+ if (WantDebugOutput(aux_out)) {
+ aux_out->DumpPlaneNormalized("screenshot_like", is_screenshot_like);
+ }
+
+ constexpr int kSearchRadius = 1;
+
+ if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) {
+ return {};
+ }
+
+ // Search for "similar enough" pixels near the screenshot-like areas.
+ ImageB is_background(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&is_background);
+ Image3F background(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&background);
+ constexpr size_t kDistanceLimit = 50;
+ float* JXL_RESTRICT background_rows[3] = {
+ background.PlaneRow(0, 0),
+ background.PlaneRow(1, 0),
+ background.PlaneRow(2, 0),
+ };
+ const size_t background_stride = background.PixelsPerRow();
+ uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0);
+ const size_t is_background_stride = is_background.PixelsPerRow();
+ std::vector<
+ std::pair<std::pair<uint32_t, uint32_t>, std::pair<uint32_t, uint32_t>>>
+ queue;
+ size_t queue_front = 0;
+ for (size_t y = 0; y < opsin.ysize(); y++) {
+ for (size_t x = 0; x < opsin.xsize(); x++) {
+ if (!screenshot_row[screenshot_stride * (y / kPatchSide) +
+ (x / kPatchSide)])
+ continue;
+ queue.push_back({{x, y}, {x, y}});
+ }
+ }
+ while (queue.size() != queue_front) {
+ std::pair<uint32_t, uint32_t> cur = queue[queue_front].first;
+ std::pair<uint32_t, uint32_t> src = queue[queue_front].second;
+ queue_front++;
+ if (is_background_row[cur.second * is_background_stride + cur.first])
+ continue;
+ is_background_row[cur.second * is_background_stride + cur.first] = 1;
+ for (size_t c = 0; c < 3; c++) {
+ background_rows[c][cur.second * background_stride + cur.first] =
+ opsin_rows[c][src.second * opsin_stride + src.first];
+ }
+ for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+ for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+ if (dx == 0 && dy == 0) continue;
+ int next_first = cur.first + dx;
+ int next_second = cur.second + dy;
+ if (next_first < 0 || next_second < 0 ||
+ static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+ static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+ continue;
+ }
+ if (static_cast<uint32_t>(
+ std::abs(next_first - static_cast<int>(src.first)) +
+ std::abs(next_second - static_cast<int>(src.second))) >
+ kDistanceLimit) {
+ continue;
+ }
+ std::pair<uint32_t, uint32_t> next{next_first, next_second};
+ if (is_similar(src, next)) {
+ if (!screenshot_row[next.second / kPatchSide * screenshot_stride +
+ next.first / kPatchSide] ||
+ is_same(src, next)) {
+ if (!is_background_row[next.second * is_background_stride +
+ next.first])
+ queue.emplace_back(next, src);
+ }
+ }
+ }
+ }
+ }
+ queue.clear();
+
+ ImageF ccs;
+ Rng rng(0);
+ bool paint_ccs = false;
+ if (WantDebugOutput(aux_out)) {
+ aux_out->DumpPlaneNormalized("is_background", is_background);
+ if (is_xyb) {
+ aux_out->DumpXybImage("background", background);
+ } else {
+ aux_out->DumpImage("background", background);
+ }
+ ccs = ImageF(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&ccs);
+ paint_ccs = true;
+ }
+
+ constexpr float kVerySimilarThreshold = 0.03f;
+ constexpr float kHasSimilarThreshold = 0.03f;
+
+ const float* JXL_RESTRICT const_background_rows[3] = {
+ background_rows[0], background_rows[1], background_rows[2]};
+ auto is_similar_b = [&](std::pair<int, int> p1, std::pair<int, int> p2) {
+ return is_similar_impl(p1, p2, const_background_rows, background_stride,
+ kVerySimilarThreshold);
+ };
+
+ constexpr int kMinPeak = 2;
+ constexpr int kHasSimilarRadius = 2;
+
+ std::vector<PatchInfo> info;
+
+ // Find small CC outside the "similar enough" areas, compute bounding boxes,
+ // and run heuristics to exclude some patches.
+ ImageB visited(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&visited);
+ uint8_t* JXL_RESTRICT visited_row = visited.Row(0);
+ const size_t visited_stride = visited.PixelsPerRow();
+ std::vector<std::pair<uint32_t, uint32_t>> cc;
+ std::vector<std::pair<uint32_t, uint32_t>> stack;
+ for (size_t y = 0; y < opsin.ysize(); y++) {
+ for (size_t x = 0; x < opsin.xsize(); x++) {
+ if (is_background_row[y * is_background_stride + x]) continue;
+ cc.clear();
+ stack.clear();
+ stack.emplace_back(x, y);
+ size_t min_x = x;
+ size_t max_x = x;
+ size_t min_y = y;
+ size_t max_y = y;
+ std::pair<uint32_t, uint32_t> reference;
+ bool found_border = false;
+ bool all_similar = true;
+ while (!stack.empty()) {
+ std::pair<uint32_t, uint32_t> cur = stack.back();
+ stack.pop_back();
+ if (visited_row[cur.second * visited_stride + cur.first]) continue;
+ visited_row[cur.second * visited_stride + cur.first] = 1;
+ if (cur.first < min_x) min_x = cur.first;
+ if (cur.first > max_x) max_x = cur.first;
+ if (cur.second < min_y) min_y = cur.second;
+ if (cur.second > max_y) max_y = cur.second;
+ if (paint_ccs) {
+ cc.push_back(cur);
+ }
+ for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+ for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+ if (dx == 0 && dy == 0) continue;
+ int next_first = static_cast<int32_t>(cur.first) + dx;
+ int next_second = static_cast<int32_t>(cur.second) + dy;
+ if (next_first < 0 || next_second < 0 ||
+ static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+ static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+ continue;
+ }
+ std::pair<uint32_t, uint32_t> next{next_first, next_second};
+ if (!is_background_row[next.second * is_background_stride +
+ next.first]) {
+ stack.push_back(next);
+ } else {
+ if (!found_border) {
+ reference = next;
+ found_border = true;
+ } else {
+ if (!is_similar_b(next, reference)) all_similar = false;
+ }
+ }
+ }
+ }
+ }
+ if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize ||
+ max_y - min_y >= kMaxPatchSize) {
+ continue;
+ }
+ size_t bpos = background_stride * reference.second + reference.first;
+ float ref[3] = {background_rows[0][bpos], background_rows[1][bpos],
+ background_rows[2][bpos]};
+ bool has_similar = false;
+ for (size_t iy = std::max<int>(
+ static_cast<int32_t>(min_y) - kHasSimilarRadius, 0);
+ iy < std::min(max_y + kHasSimilarRadius + 1, opsin.ysize()); iy++) {
+ for (size_t ix = std::max<int>(
+ static_cast<int32_t>(min_x) - kHasSimilarRadius, 0);
+ ix < std::min(max_x + kHasSimilarRadius + 1, opsin.xsize());
+ ix++) {
+ size_t opos = opsin_stride * iy + ix;
+ float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos],
+ opsin_rows[2][opos]};
+ if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) {
+ has_similar = true;
+ }
+ }
+ }
+ if (!has_similar) continue;
+ info.emplace_back();
+ info.back().second.emplace_back(min_x, min_y);
+ QuantizedPatch& patch = info.back().first;
+ patch.xsize = max_x - min_x + 1;
+ patch.ysize = max_y - min_y + 1;
+ int max_value = 0;
+ for (size_t c : {1, 0, 2}) {
+ for (size_t iy = min_y; iy <= max_y; iy++) {
+ for (size_t ix = min_x; ix <= max_x; ix++) {
+ size_t offset = (iy - min_y) * patch.xsize + ix - min_x;
+ patch.fpixels[c][offset] =
+ opsin_rows[c][iy * opsin_stride + ix] - ref[c];
+ int val = pci.Quantize(patch.fpixels[c][offset], c);
+ patch.pixels[c][offset] = val;
+ if (std::abs(val) > max_value) max_value = std::abs(val);
+ }
+ }
+ }
+ if (max_value < kMinPeak) {
+ info.pop_back();
+ continue;
+ }
+ if (paint_ccs) {
+ float cc_color = rng.UniformF(0.5, 1.0);
+ for (std::pair<uint32_t, uint32_t> p : cc) {
+ ccs.Row(p.second)[p.first] = cc_color;
+ }
+ }
+ }
+ }
+
+ if (paint_ccs) {
+ JXL_ASSERT(WantDebugOutput(aux_out));
+ aux_out->DumpPlaneNormalized("ccs", ccs);
+ }
+ if (info.empty()) {
+ return {};
+ }
+
+ // Remove duplicates.
+ constexpr size_t kMinPatchOccurrences = 2;
+ std::sort(info.begin(), info.end());
+ size_t unique = 0;
+ for (size_t i = 1; i < info.size(); i++) {
+ if (info[i].first == info[unique].first) {
+ info[unique].second.insert(info[unique].second.end(),
+ info[i].second.begin(), info[i].second.end());
+ } else {
+ if (info[unique].second.size() >= kMinPatchOccurrences) {
+ unique++;
+ }
+ info[unique] = info[i];
+ }
+ }
+ if (info[unique].second.size() >= kMinPatchOccurrences) {
+ unique++;
+ }
+ info.resize(unique);
+
+ size_t max_patch_size = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ size_t pixels = info[i].first.xsize * info[i].first.ysize;
+ if (pixels > max_patch_size) max_patch_size = pixels;
+ }
+
+ // don't use patches if all patches are smaller than this
+ constexpr size_t kMinMaxPatchSize = 20;
+ if (max_patch_size < kMinMaxPatchSize) return {};
+
+ return info;
+}
+
+} // namespace
+
+void FindBestPatchDictionary(const Image3F& opsin,
+ PassesEncoderState* JXL_RESTRICT state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, bool is_xyb) {
+ std::vector<PatchInfo> info =
+ FindTextLikePatches(opsin, state, pool, aux_out, is_xyb);
+
+ // TODO(veluca): this doesn't work if both dots and patches are enabled.
+ // For now, since dots and patches are not likely to occur in the same kind of
+ // images, disable dots if some patches were found.
+ if (info.empty() &&
+ ApplyOverride(
+ state->cparams.dots,
+ state->cparams.speed_tier <= SpeedTier::kSquirrel &&
+ state->cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+ info = FindDotDictionary(state->cparams, opsin, state->shared.cmap, pool);
+ }
+
+ if (info.empty()) return;
+
+ std::sort(
+ info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) {
+ return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize;
+ });
+
+ size_t max_x_size = 0;
+ size_t max_y_size = 0;
+ size_t total_pixels = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ size_t pixels = info[i].first.xsize * info[i].first.ysize;
+ if (max_x_size < info[i].first.xsize) max_x_size = info[i].first.xsize;
+ if (max_y_size < info[i].first.ysize) max_y_size = info[i].first.ysize;
+ total_pixels += pixels;
+ }
+
+ // Bin-packing & conversion of patches.
+ constexpr float kBinPackingSlackness = 1.05f;
+ size_t ref_xsize = std::max<float>(max_x_size, std::sqrt(total_pixels));
+ size_t ref_ysize = std::max<float>(max_y_size, std::sqrt(total_pixels));
+ std::vector<std::pair<size_t, size_t>> ref_positions(info.size());
+ // TODO(veluca): allow partial overlaps of patches that have the same pixels.
+ size_t max_y = 0;
+ do {
+ max_y = 0;
+ // Increase packed image size.
+ ref_xsize = ref_xsize * kBinPackingSlackness + 1;
+ ref_ysize = ref_ysize * kBinPackingSlackness + 1;
+
+ ImageB occupied(ref_xsize, ref_ysize);
+ ZeroFillImage(&occupied);
+ uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0);
+ size_t occupied_stride = occupied.PixelsPerRow();
+
+ bool success = true;
+ // For every patch...
+ for (size_t patch = 0; patch < info.size(); patch++) {
+ size_t x0 = 0;
+ size_t y0 = 0;
+ size_t xsize = info[patch].first.xsize;
+ size_t ysize = info[patch].first.ysize;
+ bool found = false;
+ // For every possible start position ...
+ for (; y0 + ysize <= ref_ysize; y0++) {
+ x0 = 0;
+ for (; x0 + xsize <= ref_xsize; x0++) {
+ bool has_occupied_pixel = false;
+ size_t x = x0;
+ // Check if it is possible to place the patch in this position in the
+ // reference frame.
+ for (size_t y = y0; y < y0 + ysize; y++) {
+ x = x0;
+ for (; x < x0 + xsize; x++) {
+ if (occupied_rows[y * occupied_stride + x]) {
+ has_occupied_pixel = true;
+ break;
+ }
+ }
+ } // end of positioning check
+ if (!has_occupied_pixel) {
+ found = true;
+ break;
+ }
+ x0 = x; // Jump to next pixel after the occupied one.
+ }
+ if (found) break;
+ } // end of start position checking
+
+ // We didn't find a possible position: repeat from the beginning with a
+ // larger reference frame size.
+ if (!found) {
+ success = false;
+ break;
+ }
+
+ // We found a position: mark the corresponding positions in the reference
+ // image as used.
+ ref_positions[patch] = {x0, y0};
+ for (size_t y = y0; y < y0 + ysize; y++) {
+ for (size_t x = x0; x < x0 + xsize; x++) {
+ occupied_rows[y * occupied_stride + x] = true;
+ }
+ }
+ max_y = std::max(max_y, y0 + ysize);
+ }
+
+ if (success) break;
+ } while (true);
+
+ JXL_ASSERT(ref_ysize >= max_y);
+
+ ref_ysize = max_y;
+
+ Image3F reference_frame(ref_xsize, ref_ysize);
+ // TODO(veluca): figure out a better way to fill the image.
+ ZeroFillImage(&reference_frame);
+ std::vector<PatchPosition> positions;
+ std::vector<PatchReferencePosition> pref_positions;
+ std::vector<PatchBlending> blendings;
+ float* JXL_RESTRICT ref_rows[3] = {
+ reference_frame.PlaneRow(0, 0),
+ reference_frame.PlaneRow(1, 0),
+ reference_frame.PlaneRow(2, 0),
+ };
+ size_t ref_stride = reference_frame.PixelsPerRow();
+ size_t num_ec = state->shared.metadata->m.num_extra_channels;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ PatchReferencePosition ref_pos;
+ ref_pos.xsize = info[i].first.xsize;
+ ref_pos.ysize = info[i].first.ysize;
+ ref_pos.x0 = ref_positions[i].first;
+ ref_pos.y0 = ref_positions[i].second;
+ ref_pos.ref = kPatchFrameReferenceId;
+ for (size_t y = 0; y < ref_pos.ysize; y++) {
+ for (size_t x = 0; x < ref_pos.xsize; x++) {
+ for (size_t c = 0; c < 3; c++) {
+ ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] =
+ info[i].first.fpixels[c][y * ref_pos.xsize + x];
+ }
+ }
+ }
+ for (const auto& pos : info[i].second) {
+ positions.emplace_back(
+ PatchPosition{pos.first, pos.second, pref_positions.size()});
+ // Add blending for color channels, ignore other channels.
+ blendings.push_back({PatchBlendMode::kAdd, 0, false});
+ for (size_t j = 0; j < num_ec; ++j) {
+ blendings.push_back({PatchBlendMode::kNone, 0, false});
+ }
+ }
+ pref_positions.emplace_back(std::move(ref_pos));
+ }
+
+ CompressParams cparams = state->cparams;
+ // Recursive application of patches could create very weird issues.
+ cparams.patches = Override::kOff;
+
+ RoundtripPatchFrame(&reference_frame, state, kPatchFrameReferenceId, cparams,
+ cms, pool, aux_out, /*subtract=*/true);
+
+ // TODO(veluca): this assumes that applying patches is commutative, which is
+ // not true for all blending modes. This code only produces kAdd patches, so
+ // this works out.
+ PatchDictionaryEncoder::SetPositions(
+ &state->shared.image_features.patches, std::move(positions),
+ std::move(pref_positions), std::move(blendings));
+}
+
+void RoundtripPatchFrame(Image3F* reference_frame,
+ PassesEncoderState* JXL_RESTRICT state, int idx,
+ CompressParams& cparams, const JxlCmsInterface& cms,
+ ThreadPool* pool, AuxOut* aux_out, bool subtract) {
+ FrameInfo patch_frame_info;
+ cparams.resampling = 1;
+ cparams.ec_resampling = 1;
+ cparams.dots = Override::kOff;
+ cparams.noise = Override::kOff;
+ cparams.modular_mode = true;
+ cparams.responsive = 0;
+ cparams.progressive_dc = 0;
+ cparams.progressive_mode = false;
+ cparams.qprogressive_mode = false;
+ // Use gradient predictor and not Predictor::Best.
+ cparams.options.predictor = Predictor::Gradient;
+ patch_frame_info.save_as_reference = idx; // always saved.
+ patch_frame_info.frame_type = FrameType::kReferenceOnly;
+ patch_frame_info.save_before_color_transform = true;
+ ImageBundle ib(&state->shared.metadata->m);
+ // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is
+ // no simple way to express that yet.
+ patch_frame_info.ib_needs_color_transform = false;
+ ib.SetFromImage(std::move(*reference_frame),
+ state->shared.metadata->m.color_encoding);
+ if (!ib.metadata()->extra_channel_info.empty()) {
+ // Add dummy extra channels to the patch image: patch encoding does not yet
+ // support extra channels, but the codec expects that the amount of extra
+ // channels in frames matches that in the metadata of the codestream.
+ std::vector<ImageF> extra_channels;
+ extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+ for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+ extra_channels.emplace_back(ib.xsize(), ib.ysize());
+ // Must initialize the image with data to not affect blending with
+ // uninitialized memory.
+ // TODO(lode): patches must copy and use the real extra channels instead.
+ ZeroFillImage(&extra_channels.back());
+ }
+ ib.SetExtraChannels(std::move(extra_channels));
+ }
+ PassesEncoderState roundtrip_state;
+ auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+ AuxOut patch_aux_out;
+ JXL_CHECK(EncodeFrame(cparams, patch_frame_info, state->shared.metadata, ib,
+ &roundtrip_state, cms, pool, special_frame.get(),
+ aux_out ? &patch_aux_out : nullptr));
+ if (aux_out) {
+ for (const auto& l : patch_aux_out.layers) {
+ aux_out->layers[kLayerDictionary].Assimilate(l);
+ }
+ }
+ const Span<const uint8_t> encoded = special_frame->GetSpan();
+ state->special_frames.emplace_back(std::move(special_frame));
+ if (subtract) {
+ ImageBundle decoded(&state->shared.metadata->m);
+ PassesDecoderState dec_state;
+ JXL_CHECK(dec_state.output_encoding_info.SetFromMetadata(
+ *state->shared.metadata));
+ const uint8_t* frame_start = encoded.data();
+ size_t encoded_size = encoded.size();
+ JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size, &decoded,
+ *state->shared.metadata));
+ frame_start += decoded.decoded_bytes();
+ encoded_size -= decoded.decoded_bytes();
+ size_t ref_xsize =
+ dec_state.shared_storage.reference_frames[idx].frame.color()->xsize();
+ // if the frame itself uses patches, we need to decode another frame
+ if (!ref_xsize) {
+ JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size,
+ &decoded, *state->shared.metadata));
+ }
+ JXL_CHECK(encoded_size == 0);
+ state->shared.reference_frames[idx] =
+ std::move(dec_state.shared_storage.reference_frames[idx]);
+ } else {
+ state->shared.reference_frames[idx].frame = std::move(ib);
+ }
+}
+
+} // namespace jxl